Consistent ensemble predictions of intertidal barnacle distributions across visual and eDNA-based occurrence data

Author

Simons D-L, Webb TJ, Spencer M, Mieszkowska N

Introduction

This pipeline replicates all analyses in Simons et al (2025) “Consistent ensemble predictions of intertidal barnacle distributions across visual and eDNA-based occurrence data”.

Set-up

Load packages

#clear environment
rm(list=ls())

# state packages
packages <- c("devtools",
              "qiime2R", #github download
              "microbiome", #github download
              "tidyverse",
              "vegan",
              "ggforce",
              "phyloseq",
              "cowplot",
              "geosphere",
              'lme4',
              "car",
              "sf",
              "pROC", #ROC and AUC for model validation 
              "ncdf4",
              "raster",
              "lubridate",
              "terra",
              "geodata",
              "predicts",
              "sdmpredictors",
              "tidyterra",
              "exactextractr",
              "arrow",
              "here",
              "emodnet.wfs", #github
              "biooracler",#github
              "biomod2",
              "dismo",
              "xgboost",
              "caret",
              "gam",
              "mda",
              "earth",
              "maxnet",
              "randomForest",
              "RODBC",
              "odbc") 

# Install any missing packages along with their dependencies
new.packages <- packages[!(packages %in% installed.packages()[, "Package"])]
if (length(new.packages)) install.packages(new.packages, dependencies = TRUE)

# Load all packages
invisible(lapply(packages, library, character.only = TRUE))

Functions

snap_to_nearest_valid <- function(environment_stack, occurance_data) {
  
  # Select lon/lat columns using dplyr
  coords <- occurance_data %>% select(c(lon, lat))
  
  # Convert to SpatialPoints for raster extraction
  pts <- SpatialPoints(coords, proj4string = crs(environment_stack))
  
  # Extract raster values at occurrence points
  vals <- raster::extract(environment_stack, pts)
  
  # Identify points on any NA across layers
  na_points <- apply(vals, 1, function(x) any(is.na(x)))
  
  # Identify valid cells (no NAs in any layer)
  valid_cells <- Which(
    !is.na(environment_stack[[1]]) & !is.na(environment_stack[[2]]) & !is.na(environment_stack[[3]]),
    cells = TRUE
  )
  valid_xy <- xyFromCell(environment_stack, valid_cells)
  
  # Initialize corrected coordinates
  corrected_coords <- coords
  
  # Snap only points on NA cells
  if(any(na_points)) {
    nn <- FNN::get.knnx(valid_xy, coords[na_points, , drop = FALSE], k = 1)
    corrected_coords[na_points, ] <- valid_xy[nn$nn.index, ]
  }
  
  return(corrected_coords)
}
g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
}

Set seed

This is to make sure the random elements in models start from the same point every time. This will make our results reproducible. This is also done before any randomized element such as before each model.

seed.val.set = 42 # life, the universe, and everything

options(biomod2.parallel = FALSE) # forces everything to run sequentially, so seeds behave more predictably

Occurrence data

Import paired eDNA and MarClim occurrences

Let’s first read in our occurrences and metadata.

# occurance data
occurance_data <- read.csv(file = "Data/Occurrence_data/my_dataset/combined_methods_long_data.csv", row.names = 1)

# get lat long from meta data
metadata <- read.csv(file = "Data/Metadata/metadata.csv", na.strings = c(""))
metadata <- metadata %>%
  subset(select = c(localityID, exposure, decimalLongitude, decimalLatitude)) %>% unique() %>% na.omit()

# add lat long to occurance
occurance_data <- left_join(occurance_data, metadata)

# filter to only species of interest
species_list <- c(
  "Semibalanus balanoides",
  "Chthamalus montagui"
)

occurance_data_reduced <- occurance_data %>% filter(taxa %in% species_list)
str(occurance_data_reduced)
'data.frame':   116 obs. of  15 variables:
 $ taxa              : chr  "Chthamalus montagui" "Chthamalus montagui" "Chthamalus montagui" "Chthamalus montagui" ...
 $ localityID        : chr  "CN01" "CN02" "CN03" "CN04" ...
 $ count             : num  7 7 7 6 7 ...
 $ region            : chr  "Southwest England" "Southwest England" "Southwest England" "Southwest England" ...
 $ genus             : chr  "Chthamalus" "Chthamalus" "Chthamalus" "Chthamalus" ...
 $ site              : chr  "Lizard Point" "Sennen Cove" "Looe" "St Ives" ...
 $ phylum            : chr  "Arthropoda" "Arthropoda" "Arthropoda" "Arthropoda" ...
 $ method            : chr  "Visual" "Visual" "Visual" "Visual" ...
 $ pa                : int  1 1 1 1 1 0 0 0 0 0 ...
 $ num_samples       : int  NA NA NA NA NA NA NA NA NA NA ...
 $ num_samples_found : int  NA NA NA NA NA NA NA NA NA NA ...
 $ prop_samples_found: num  NA NA NA NA NA NA NA NA NA NA ...
 $ exposure          : chr  "Sheltered" "Moderatley exposed" "Moderatley exposed" "Exposed" ...
 $ decimalLongitude  : num  -5.21 -5.71 -4.46 -5.48 -4.99 ...
 $ decimalLatitude   : num  50 50.1 50.3 50.2 50.5 ...

Split into species of interest for combined and individual methods.

# Clean species names to use in object names (e.g., "Semibalanus balanoides" -> "semibalanus")
clean_name <- function(name) {
  tolower(gsub(" ", "_", name))
}

# Loop through each species and create the filtered data frames
for (species in species_list) {
  clean <- clean_name(species)
  
  assign(paste0(clean, "_obs_eDNA"),
         occurance_data_reduced %>%
           filter(taxa == species, method == "eDNA", pa == 1) %>%
           select(taxa, decimalLongitude, decimalLatitude) %>%
           rename(lat = decimalLatitude, lon = decimalLongitude, species = taxa))

  assign(paste0(clean, "_obs_visual"),
         occurance_data_reduced %>%
           filter(taxa == species, method == "Visual", pa == 1) %>%
           select(taxa, decimalLongitude, decimalLatitude) %>%
           rename(lat = decimalLatitude, lon = decimalLongitude, species = taxa))
}

# Check the data to make sure it loaded correctly
head(semibalanus_balanoides_obs_eDNA)
                 species     lon     lat
1 Semibalanus balanoides -5.7100 50.0780
2 Semibalanus balanoides -4.4580 50.3410
3 Semibalanus balanoides -5.4751 50.2178
4 Semibalanus balanoides -4.9850 50.5450
5 Semibalanus balanoides -0.2609 54.2158
6 Semibalanus balanoides -0.4079 54.3014
# Determine geographic extent of our data
# find general latitudinal and longitudinal boundaries

max_lat <- ceiling(max(occurance_data_reduced$decimalLatitude))
min_lat <- floor(min(occurance_data_reduced$decimalLatitude))
max_lon <- ceiling(max(occurance_data_reduced$decimalLongitude))
min_lon <- floor(min(occurance_data_reduced$decimalLongitude))

# Store boundaries in a single extent object
geographic_extent <- ext(x = c(min_lon, max_lon, min_lat, max_lat))
uk_extent <- ext(-11, 2, 49.5, 61)  # Westernmost to easternmost, southernmost to northernmost
uk_extent_match <- ext(-10.8,2,49.4,60.8)

# plot features
wrld <- world(path=".")

Import wider MarClim occurrences

MarClim has a broader range of sites than just the sites matched to our eDNA samples. We can use this as an evaluation data set because it contains close to true absence and presence for the sampled sites. We are going to stick to 2022 - 2023 period to keep things simple.

# read
marclim_broad <- read.csv(file = "Data/Occurrence_data/my_dataset/MarClim_SACFOR_22_23.csv")

# filter for just semibalanus and chthamalus
marclim_broad_sp <- marclim_broad %>%
  select(Semibalanus.balanoides, 
         Chthamalus.montagui, 
         Lat..WGS84., 
         Long..WGS84.)

#convert to PA
marclim_broad_sp_PA <- marclim_broad_sp %>%
  mutate(across(
    c(Semibalanus.balanoides, Chthamalus.montagui),
    ~ ifelse(. %in% c("NR", "NS", "?", ""), 0, 1)
  )) %>% na.omit()

# tidy labels
marclim_broad_sp_PA <- marclim_broad_sp_PA %>%
  rename(
    lat = Lat..WGS84.,
    lon = Long..WGS84.
  )

#only within uK extent
marclim_broad_sp_PA <- subset(marclim_broad_sp_PA,
  lon >= -10.8 & lon <= 2 &
  lat >= 49.72 & lat <= 60.8
)

# make individual species presence only datasets for maps
semibalanus_marclim_broad <- marclim_broad_sp_PA %>%
  select(-c(Chthamalus.montagui)) %>% 
  filter(Semibalanus.balanoides == 1) %>% 
  mutate(species = "Semibalanus balanoides") %>% 
  select(-c(Semibalanus.balanoides))

chthamalus_marclim_broad <- marclim_broad_sp_PA %>%
  select(-c(Semibalanus.balanoides)) %>% 
  filter(Chthamalus.montagui == 1) %>% 
  mutate(species = "Chthamalus montagui") %>% 
  select(-c(Chthamalus.montagui))

Import GBIF occurrences

We’ve downloaded previously (the # code) then re-imported to save time and memory.

# download all obs from 2000 onwards
minYear = 2000
#semibalanus_GBIF_obs <- geodata::sp_occurrence("semibalanus", "balanoides", geo =
                                                 #TRUE, ext = uk_extent)

#dups <- duplicated(semibalanus_GBIF_obs[, c('lon', 'lat')])
#semibalanus_GBIF_obs_nodups <- semibalanus_GBIF_obs[!dups, ]
#semibalanus_GBIF_obs_nodups_recent <- subset(semibalanus_GBIF_obs_nodups, year >= minYear)

# Seperate the baseline data (2000 - 2021)

baselineYears <- c(2000:2021)
#semibalanus_GBIF_base <- semibalanus_GBIF_obs_nodups_recent %>% 
  #filter(year %in% baselineYears,
         #country %in% c("United Kingdom", "Isle of Man")) %>%  #filter out Ireland
  #filter(!(lon > -8.2 & lon < -5.4 & lat > 54.0 & lat < 55.3)) # filter out

#write.csv(semibalanus_GBIF_base, file = paste("Data/Occurrence_data/GBIF/semibalanus_GBIF_base",minYear,".csv", sep = ""))

semibalanus_GBIF_base <- read.csv("Data/Occurrence_data/GBIF/semibalanus_GBIF_base2000.csv")

# 673 obs for this period

# Seperate the additional data (2022 - 2023)
#semibalanus_GBIF_additional <- semibalanus_GBIF_obs_nodups_recent %>% 
  #filter(year %in% c(2022, 2023),
         #country %in% c("United Kingdom", "Isle of Man")) %>%  #filter out Ireland
  #filter(!(lon > -8.2 & lon < -5.4 & lat > 54.0 & lat < 55.3)) # filter out Northern Ireland

#write.csv(semibalanus_GBIF_additional, file = paste("Data/Occurrence_data/GBIF/semibalanus_GBIF_additional_2022.csv", sep = ""))

semibalanus_GBIF_additional <- read.csv("Data/Occurrence_data/GBIF/semibalanus_GBIF_additional_2022.csv")

# obs for additonal observations
# download all obs from 2000 onwards
#chthamalus_GBIF_obs <- geodata::sp_occurrence("chthamalus", "montagui", geo =
                                                 #TRUE, ext = uk_extent)

#dups <- duplicated(chthamalus_GBIF_obs[, c('lon', 'lat')])
#chthamalus_GBIF_obs_nodups <- chthamalus_GBIF_obs[!dups, ]
#chthamalus_GBIF_obs_nodups_recent <- subset(chthamalus_GBIF_obs_nodups, year >= minYear)

# Seperate the baseline data (2000 - 2021)
#chthamalus_GBIF_base <- chthamalus_GBIF_obs_nodups_recent %>% 
  #filter(year %in% baselineYears,
         #country %in% c("United Kingdom", "Isle of Man")) %>%  #filter out Ireland
  #filter(!(lon > -8.2 & lon < -5.4 & lat > 54.0 & lat < 55.3)) # filter out

#write.csv(chthamalus_GBIF_base, file = paste("Data/Occurrence_data/GBIF/chthamalus_GBIF_base",minYear,".csv", sep = ""))

chthamalus_GBIF_base <- read.csv("Data/Occurrence_data/GBIF/chthamalus_GBIF_base2000.csv")

# 578 obs for this period

# Seperate the additional data (2022 - 2023)
#chthamalus_GBIF_additional <- chthamalus_GBIF_obs_nodups_recent %>% 
  #filter(year %in% c(2022, 2023),
         #country %in% c("United Kingdom", "Isle of Man")) %>%  #filter out Ireland
  #filter(!(lon > -8.2 & lon < -5.4 & lat > 54.0 & lat < 55.3)) # filter out Northern Ireland

#write.csv(chthamalus_GBIF_additional, file = paste("Data/Occurrence_data/GBIF/chthamalus_GBIF_additional_2022.csv", sep = ""))

chthamalus_GBIF_additional <- read.csv("Data/Occurrence_data/GBIF/chthamalus_GBIF_additional_2022.csv")

# obs for additonal observations

Create data sets for model

We need the following versions of our data for each species:

  1. MarClim alone (as the evaluation data set)

  2. GBIF base (2000 - 2021) alone

  3. GBIF base (2000 - 2021) + GBIF additional (2022 - 2023)

  4. GBIF base (2000 - 2021) + eDNA additional (2022 - 2023)

# MarClim alone - 218 obs
semibalanus_MarClim <- semibalanus_marclim_broad %>% 
  mutate(species = as.factor(species),
         source = "MarClim") %>%
  select(species, lat, lon, source)

# GBIF base (2000 - 2021) + GBIF (2022 - 2023) - 1649 obs
semibalanus_GBIF_base_clean <- semibalanus_GBIF_base %>% 
  mutate(species = as.factor(species),
         source = "GBIF_base") %>%
  select(species, lat, lon, source)

semibalanus_GBIF_baseline <- semibalanus_GBIF_base_clean

semibalanus_GBIF_additional_clean <- semibalanus_GBIF_additional %>% 
  mutate(species = as.factor(species),
         source = "GBIF_additional") %>%
  select(species, lat, lon, source)

semibalanus_GBIF_GBIF <- rbind(semibalanus_GBIF_base_clean, semibalanus_GBIF_additional_clean)

# GBIF base data (2000 - 2021) + eDNA (2022 - 2023) - 1631 obs
semibalanus_balanoides_obs_eDNA_clean <- semibalanus_balanoides_obs_eDNA %>% 
  mutate(species = as.factor(species),
         source = "eDNA") %>%
  select(species, lat, lon, source)

semibalanus_GBIF_eDNA <- rbind(semibalanus_GBIF_base_clean, semibalanus_balanoides_obs_eDNA_clean)

# we can make a full version for our map
semibalanus_all_sources <- rbind(
  semibalanus_MarClim,
  semibalanus_GBIF_base_clean,
  semibalanus_GBIF_additional_clean,
  semibalanus_balanoides_obs_eDNA_clean
)

Let’s check to make sure they worked correctly.

dim(semibalanus_MarClim) # marclim alone
[1] 218   4
dim(semibalanus_GBIF_baseline) # GBIF baseline
[1] 1611    4
dim(semibalanus_GBIF_GBIF) #GBIF base + GBIF
[1] 1649    4
dim(semibalanus_GBIF_eDNA) #GBIF base + eDNA
[1] 1631    4
dim(semibalanus_all_sources) # all
[1] 1887    4

Same again for chthamalus.

# MarClim alone - 145 obs
chthamalus_MarClim <- chthamalus_marclim_broad %>% 
  mutate(species = as.factor(species),
         source = "MarClim") %>%
  select(species, lat, lon, source)

# GBIF base (2000 - 2021) + GBIF (2022 - 2023) - 605 obs
chthamalus_GBIF_base_clean <- chthamalus_GBIF_base %>% 
  mutate(species = as.factor(species),
         source = "GBIF_base") %>%
  select(species, lat, lon, source)

chthamalus_GBIF_baseline <- chthamalus_GBIF_base_clean

chthamalus_GBIF_additional_clean <- chthamalus_GBIF_additional %>% 
  mutate(species = as.factor(species),
         source = "GBIF_additional") %>%
  select(species, lat, lon, source)

chthamalus_GBIF_GBIF <- rbind(chthamalus_GBIF_base_clean, chthamalus_GBIF_additional_clean) 

# GBIF base data (2000 - 2021) + eDNA (2022 - 2023) - 589 obs
chthamalus_montagui_obs_eDNA_clean <- chthamalus_montagui_obs_eDNA %>% 
  mutate(species = as.factor(species),
         source = "eDNA") %>%
  select(species, lat, lon, source)

chthamalus_GBIF_eDNA <- rbind(chthamalus_GBIF_base_clean, chthamalus_montagui_obs_eDNA_clean) 

# we can make a full version for our map
chthamalus_all_sources <- rbind(
  chthamalus_MarClim,
  chthamalus_GBIF_base_clean,
  chthamalus_GBIF_additional_clean,
  chthamalus_montagui_obs_eDNA_clean
)

Let’s check to make sure they worked correctly.

dim(chthamalus_MarClim) # marclim alone
[1] 145   4
dim(chthamalus_GBIF_baseline) #GBIF baseline
[1] 578   4
dim(chthamalus_GBIF_GBIF) #GBIF base + GBIF
[1] 605   4
dim(chthamalus_GBIF_eDNA) #GBIF base + eDNA
[1] 589   4
dim(chthamalus_all_sources) # all
[1] 761   4

Visualize occurrences

Let’s plot our occurrences on a map.

# Get UK boundaries at admin level 1 (countries/subunits)
uk <- rnaturalearth::ne_countries(country = c("United Kingdom"), returnclass = "sf", scale = "large")

northern_ireland <- rnaturalearth::ne_states(country = "United Kingdom", returnclass = "sf") %>%
  filter(geonunit == "Northern Ireland")   %>% 
  st_union() %>%      # merge all the small pieces
  st_as_sf()          # back to sf object

other_maps <- rnaturalearth::ne_countries(country = c("Ireland", "Isle of Man", "France"), returnclass = "sf", scale = "large")
# colours
GBIF_colour = "#C1292E"
MarClim_colour = "darkgreen"
eDNA_colour= "#235789"
GBIF_base_colour = "#FDE235"
semibalanus_map <- ggplot() +
  geom_sf(data = uk, fill = "grey98", color = "grey") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  # Add all points except MarClim
  geom_jitter(data = subset(semibalanus_all_sources, source != "MarClim"), 
              aes(x = lon, y = lat, color = source, shape = source), 
              size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
  
  coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
  theme_minimal() +
  labs(x = "Longitude", y = "Latitude", color = "Data source") +
  scale_color_manual(values = c(
    "GBIF_additional" = GBIF_colour,
    "eDNA" = eDNA_colour,
    "GBIF_base" = GBIF_base_colour
  )) +
  scale_shape_manual(values = c(
    "GBIF_additional" = 17,  # solid triangle 
    "eDNA" = 15,             # solid square
    "GBIF_base" = 19)) +  
  theme(legend.position = "none")

semibalanus_map

chthamalus_map <- ggplot() +
  geom_sf(data = uk, fill = "grey98", color = "grey") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  # Add all points except MarClim
  geom_jitter(data = subset(chthamalus_all_sources, source != "MarClim"), 
              aes(x = lon, y = lat, color = source, shape = source), 
              size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
  
  coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
  theme_minimal() +
  labs(x = "Longitude", y = "Latitude", color = "Data source") +
  scale_color_manual(values = c(
    "GBIF_additional" = GBIF_colour,
    "eDNA" = eDNA_colour,
    "GBIF_base" = GBIF_base_colour
  )) +
  scale_shape_manual(values = c(
    "GBIF_additional" = 17,  # solid triangle 
    "eDNA" = 15,             # solid square
    "GBIF_base" = 19)) +  
  theme(legend.position = "none")

chthamalus_map

chthamalus_map_legend <- ggplot() +
  geom_sf(data = uk, fill = "grey98", color = "grey") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  # Add all points except MarClim
  geom_jitter(data = subset(chthamalus_all_sources, source != "MarClim"), 
              aes(x = lon, y = lat, color = source, shape = source), 
              size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
  
  coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
  theme_minimal() +
  labs(x = "Longitude", y = "Latitude", color = "Data source", shape = "Data source") +
  scale_color_manual(
    values = c(
      "GBIF_additional" = GBIF_colour,
      "eDNA" = eDNA_colour,
      "GBIF_base" = GBIF_base_colour
    ),
    labels = c(
      "GBIF_additional" = "GBIF additional",
      "eDNA" = "eDNA",
      "GBIF_base" = "GBIF baseline"
    )
  ) +
  scale_shape_manual(
    values = c(
      "GBIF_additional" = 17,  # solid triangle 
      "eDNA" = 15,             # solid square
      "GBIF_base" = 19         # solid circle
    ),
    labels = c(
      "GBIF_additional" = "GBIF additional",
      "eDNA" = "eDNA",
      "GBIF_base" = "GBIF baseline"
    )
  ) +
  theme(legend.position = "bottom")

legend <- g_legend(chthamalus_map_legend)
joint_map <- cowplot::plot_grid(
  cowplot::plot_grid(
    semibalanus_map,
    chthamalus_map,
    ncol = 2,
    labels = c("a", "b"),
    rel_heights = c(0.8, 1)
  ),
  legend,
  ncol = 1,
  rel_heights = c(1, 0.06)
)

joint_map

# Save the plot
ggsave(joint_map, filename = "Figures/chthamalus_semibalanus_map.png", width = 9, height = 7, dpi = 300)
semibalanus_map_eval <- ggplot() +
  geom_sf(data = uk, fill = "grey98", color = "grey") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  # Add all points except MarClim
  geom_jitter(data = subset(semibalanus_all_sources, source == "MarClim"), 
              aes(x = lon, y = lat, color = source, shape = source), 
              size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
  
  coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
  theme_minimal() +
  labs(x = "Longitude", y = "Latitude", color = "Data source") +
  scale_color_manual(values = c(
    "MarClim" = MarClim_colour
  )) +
  theme(legend.position = "none")

semibalanus_map_eval

chthamalus_map_eval <- ggplot() +
  geom_sf(data = uk, fill = "grey98", color = "grey") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  # Add all points except MarClim
  geom_jitter(data = subset(chthamalus_all_sources, source == "MarClim"), 
              aes(x = lon, y = lat, color = source, shape = source), 
              size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
  
  coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
  theme_minimal() +
  labs(x = "Longitude", y = "Latitude", color = "Data source") +
  scale_color_manual(values = c(
    "MarClim" = MarClim_colour
  )) +
  theme(legend.position = "none")

chthamalus_map_eval

Environmental data

It’s time to get environmental variables of interest. We have a few options here - let’s explore them.

# set uk extent and colours
uk_extent <- extent(-8, 2, 49.7, 61)
cols <- colorRampPalette(c("blue", "lightblue", "yellow", "red"))(100)

Temp, pH, coastline (Bio-ORCLE)

Bio-ORACLE is a comprehensive data set providing a wide range of marine environmental variables tailored for ecological and bio-geographical analysis. These variables include surface and benthic values for parameters such as temperature, salinity, nutrients, oxygen, pH, and more, derived from satellite and modeled oceanographic data.

We use the biooracler package here to obtain our environmental variables of interest from current and projected periods.

## ocean temp
ocean_temp_present = readRDS("Data/Environmental_data/BIO-ORACLE/thetao_baseline_2000_2019_depthsurf.rds")
plot(
  ocean_temp_present,
  col = cols
)

ocean_temp_avg <- mean(ocean_temp_present)
ocean_temp_avg
class       : SpatRaster 
dimensions  : 227, 201, 1  (nrow, ncol, nlyr)
resolution  : 0.05, 0.05000001  (x, y)
extent      : -8.05, 2, 49.7, 61.05  (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (CRS84) (OGC:CRS84) 
source(s)   : memory
name        :      mean 
min value   :  9.076521 
max value   : 13.317162 
ocean_temp_projected = readRDS("Data/Environmental_data/BIO-ORACLE/thetao_ssp585_2020_2100_depthsurf.rds")
plot(
  ocean_temp_projected,
  col = cols
)

ocean_temp_projected_90 = ocean_temp_projected["thetao_mean_8"]

## air temp
air_temp_present = readRDS("Data/Environmental_data/BIO-ORACLE/tas_baseline_2000_2020_depthsurf.rds")
plot(
  air_temp_present,
  col = cols
)

air_temp_projected = readRDS("Data/Environmental_data/BIO-ORACLE/tas_ssp585_2020_2100_depthsurf.rds")
plot(
  air_temp_projected,
  col = cols
)

## ph
ph_present = readRDS("Data/Environmental_data/BIO-ORACLE/ph_baseline_2000_2018_depthsurf.rds")
plot(ph_present,
     col = cols)

ph_avg <- mean(ph_present)
ph_avg
class       : SpatRaster 
dimensions  : 227, 201, 1  (nrow, ncol, nlyr)
resolution  : 0.05, 0.05000001  (x, y)
extent      : -8.05, 2, 49.7, 61.05  (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (CRS84) (OGC:CRS84) 
source(s)   : memory
name        :     mean 
min value   : 8.025923 
max value   : 8.116090 
ph_projected = readRDS("Data/Environmental_data/BIO-ORACLE/ph_ssp585_2020_2100_depthsurf.rds")
plot(ph_projected,
     col = cols)

ph_projected_90 <- ph_projected["ph_mean_8"]

# terrain
terrain <- readRDS("Data/Environmental_data/BIO-ORACLE/terrain_characteristics.rds")
plot(terrain,
     main = "Terrain")

Rocky substrate (JNCC)

Let’s try the JNCC UKASH Mosaic of Localized Maps which does include intertidal data, but only for 12% of the UK.

# Path to the unzipped .gdb folder for UKASH Mosaic of Localised Maps (with intertidal zone)
gdb_path <- "Data/Environmental_data/JNCC/UKASH_MosaicOfLocalisedMaps_v2025.gdb"

# List available layers
st_layers(gdb_path)
Driver: OpenFileGDB 
Available layers:
                         layer_name geometry_type features fields crs_name
1 UKASH_MosaicLocalisedMaps_v2025_1 Multi Polygon   899864     20   WGS 84
# read in
ukash <- st_read(gdb_path, layer = "UKASH_MosaicLocalisedMaps_v2025_1")
Reading layer `UKASH_MosaicLocalisedMaps_v2025_1' from data source 
  `C:\Users\dlsimons\Github_Repositories\intertidal-eDNA-SDMs-analyses\Data\Environmental_data\JNCC\UKASH_MosaicOfLocalisedMaps_v2025.gdb' 
  using driver `OpenFileGDB'
Simple feature collection with 899864 features and 20 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -14.40942 ymin: 48.11506 xmax: 2.89474 ymax: 60.86268
Geodetic CRS:  WGS 84
# explore
print(ukash)
Simple feature collection with 899864 features and 20 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -14.40942 ymin: 48.11506 xmax: 2.89474 ymax: 60.86268
Geodetic CRS:  WGS 84
First 10 features:
   SNCB_UID EMODnetGUI  OrigCode
1             GB001446 M.AtLB.Ro
2             GB001446 M.AtLB.Ro
3             GB001446 M.AtLB.Ro
4             GB001446 M.AtLB.Ro
5             GB001446 M.AtLB.Ro
6             GB001446 M.AtLB.Ro
7             GB001446 M.AtLB.Ro
8             GB001446 M.AtLB.Ro
9             GB001446 M.AtLB.Ro
10            GB001446 M.AtLB.Ro
                                               OrigName
1  Atlantic lower bathyal rock and other hard substrata
2  Atlantic lower bathyal rock and other hard substrata
3  Atlantic lower bathyal rock and other hard substrata
4  Atlantic lower bathyal rock and other hard substrata
5  Atlantic lower bathyal rock and other hard substrata
6  Atlantic lower bathyal rock and other hard substrata
7  Atlantic lower bathyal rock and other hard substrata
8  Atlantic lower bathyal rock and other hard substrata
9  Atlantic lower bathyal rock and other hard substrata
10 Atlantic lower bathyal rock and other hard substrata
                                                      OrigClass EUNISCode
1  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
2  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
3  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
4  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
5  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
6  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
7  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
8  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
9  Marine Habitat Classification for Britain and Ireland v22.04     A6.12
10 Marine Habitat Classification for Britain and Ireland v22.04     A6.12
   EUNISDetDate EUNISDetName EUNISTranR EUNISTranC   MHCCode
1          <NA>         <NA>          #            M.AtLB.Ro
2          <NA>         <NA>          #            M.AtLB.Ro
3          <NA>         <NA>          #            M.AtLB.Ro
4          <NA>         <NA>          #            M.AtLB.Ro
5          <NA>         <NA>          #            M.AtLB.Ro
6          <NA>         <NA>          #            M.AtLB.Ro
7          <NA>         <NA>          #            M.AtLB.Ro
8          <NA>         <NA>          #            M.AtLB.Ro
9          <NA>         <NA>          #            M.AtLB.Ro
10         <NA>         <NA>          #            M.AtLB.Ro
            MHCDetDate MHCDetName MHCTranR MHCTranC MESH_conf stp3_conf EUNISL3
1  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
2  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
3  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
4  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
5  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
6  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
7  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
8  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
9  2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
10 2023-09-02 01:00:00      Cefas        S                 NA        NA    A6.1
   SHAPE_Length   SHAPE_Area                          SHAPE
1   0.001357848 1.107135e-07 MULTIPOLYGON (((-9.821604 4...
2   0.001357856 1.107145e-07 MULTIPOLYGON (((-9.821615 4...
3   0.001357865 1.107158e-07 MULTIPOLYGON (((-9.821622 4...
4   0.001358004 1.107346e-07 MULTIPOLYGON (((-9.821765 4...
5   0.004487330 6.921092e-07 MULTIPOLYGON (((-9.821802 4...
6   0.001357906 1.107213e-07 MULTIPOLYGON (((-9.821665 4...
7   0.004352628 7.196127e-07 MULTIPOLYGON (((-9.821565 4...
8   0.006658651 1.134971e-06 MULTIPOLYGON (((-9.820716 4...
9   0.005838411 1.162602e-06 MULTIPOLYGON (((-9.82067 48...
10  0.009652807 2.573850e-06 MULTIPOLYGON (((-9.8215 48....
# filter for rocky only
rock_ukash <- ukash %>%
  filter(str_detect(str_to_lower(OrigName), "rock"))

# make a raster and match format to the other environmental variables
rock_ukash_rast <- raster::rasterize(rock_ukash, ocean_temp_present)

# plot
rock_ukash_plot <- ggplot(rock_ukash) +
  geom_sf(aes(fill = OrigName)) +
  scale_fill_viridis_d(option = "plasma", name = "Substrate Type") +
  coord_sf(crs = 4326) +
  theme_minimal() +
  labs(
    title = "UKASH Mosaic Localised Rock Habitat Map",
    x = "Longitude", y = "Latitude"
  ) + theme(legend.position = "none")

rock_ukash_plot

ggsave(rock_ukash_plot, filename = "Figures/UKASH_Mosaic_Localised_Rock.jpeg")

To get a better coverage, let’s import rocky habitat information from JNCC Marine Recorder Public UK snapshot.

Two Microsoft access databases (primary and secondary) are available at https://hub.jncc.gov.uk/assets/b9934e31-39b6-41f9-9364-d1e93db68307. The rocky habitat we are interested in are defined here - https://mhc.jncc.gov.uk/biotopes/jnccmncr00000003. Primary database contains locations, secondary contains biotypes.

# lists the drivers already installed for excel, access ect...
unique(odbc::odbcListDrivers()[[1]])
[1] "SQL Server"                                            
[2] "Microsoft Access Driver (*.mdb, *.accdb)"              
[3] "Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)"
[4] "Microsoft Access Text Driver (*.txt, *.csv)"           
[5] "Microsoft Access dBASE Driver (*.dbf, *.ndx, *.mdx)"   
# set database locations
primary_dbname <- "Data/Environmental_data/JNCC/SnapshotDatav52_PUBLIC_20220124.mdb"
secondary_dbname <- "Data/Environmental_data/JNCC//SnapshotDatav52_PUBLIC_20220124_secondary.mdb"

# Establish connections between R and Access
RODBC::odbcCloseAll() # clear any existing
primary_con <- RODBC::odbcConnectAccess2007(primary_dbname)
secondary_con <- RODBC::odbcConnectAccess2007(secondary_dbname)

RODBC::sqlTables(primary_con)
                                                             TABLE_CAT
1  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
2  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
3  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
4  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
5  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
6  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
7  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
8  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
9  Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
10 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
11 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
12 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
13 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
14 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
15 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
16 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
17 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
18 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
19 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
20 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
21 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
22 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
23 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
24 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
25 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
   TABLE_SCHEM                 TABLE_NAME   TABLE_TYPE REMARKS
1         <NA>               PhysicalData      SYNONYM    <NA>
2         <NA>           SampleBiotopeAll      SYNONYM    <NA>
3         <NA>       SampleBiotopeHistory      SYNONYM    <NA>
4         <NA>       SampleSpeciesHistory      SYNONYM    <NA>
5         <NA>          MSysAccessObjects SYSTEM TABLE    <NA>
6         <NA>              MSysAccessXML SYSTEM TABLE    <NA>
7         <NA>                   MSysACEs SYSTEM TABLE    <NA>
8         <NA> MSysNavPaneGroupCategories SYSTEM TABLE    <NA>
9         <NA>          MSysNavPaneGroups SYSTEM TABLE    <NA>
10        <NA>  MSysNavPaneGroupToObjects SYSTEM TABLE    <NA>
11        <NA>       MSysNavPaneObjectIDs SYSTEM TABLE    <NA>
12        <NA>                MSysObjects SYSTEM TABLE    <NA>
13        <NA>                MSysQueries SYSTEM TABLE    <NA>
14        <NA>          MSysRelationships SYSTEM TABLE    <NA>
15        <NA>                   Location        TABLE    <NA>
16        <NA>                  Reference        TABLE    <NA>
17        <NA>                     Sample        TABLE    <NA>
18        <NA>            SampleReplicate        TABLE    <NA>
19        <NA>              SampleSpecies        TABLE    <NA>
20        <NA>            SelectedObjects        TABLE    <NA>
21        <NA>                 Selections        TABLE    <NA>
22        <NA>                     Survey        TABLE    <NA>
23        <NA>                SurveyEvent        TABLE    <NA>
24        <NA>            SurveyReference        TABLE    <NA>
25        <NA>                    Version        TABLE    <NA>
RODBC::sqlTables(secondary_con)
                                                                         TABLE_CAT
1  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
2  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
3  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
4  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
5  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
6  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
7  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
8  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
9  Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
10 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
11 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
12 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
13 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
14 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
15 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
   TABLE_SCHEM                 TABLE_NAME   TABLE_TYPE REMARKS
1         <NA>          MSysAccessObjects SYSTEM TABLE    <NA>
2         <NA>              MSysAccessXML SYSTEM TABLE    <NA>
3         <NA>                   MSysACEs SYSTEM TABLE    <NA>
4         <NA> MSysNavPaneGroupCategories SYSTEM TABLE    <NA>
5         <NA>          MSysNavPaneGroups SYSTEM TABLE    <NA>
6         <NA>  MSysNavPaneGroupToObjects SYSTEM TABLE    <NA>
7         <NA>       MSysNavPaneObjectIDs SYSTEM TABLE    <NA>
8         <NA>                MSysObjects SYSTEM TABLE    <NA>
9         <NA>                MSysQueries SYSTEM TABLE    <NA>
10        <NA>          MSysRelationships SYSTEM TABLE    <NA>
11        <NA>               PhysicalData        TABLE    <NA>
12        <NA>           SampleBiotopeAll        TABLE    <NA>
13        <NA>       SampleBiotopeHistory        TABLE    <NA>
14        <NA>       SampleSpeciesHistory        TABLE    <NA>
15        <NA>                    Version        TABLE    <NA>
# Fetch the data from the SampleBiotopeAll table
locations <- RODBC::sqlFetch(primary_con, "Location", rownames = T)
SampleBiotopeAll <- RODBC::sqlFetch(secondary_con, "SampleBiotopeAll", rownames = T)

# Pull all the Biotope occurrence keys littoral habitats
Biotope_littoral <- SampleBiotopeAll %>%
  dplyr::filter(grepl("LR", BiotopeCode))  # matches any LR entries (includes eulittoral)

# join
biotype_location <- left_join(Biotope_littoral, locations, by = "ObjectId")
biotype_location_reduced <- biotype_location %>% select(ObjectId, BiotopeCode, Lat, Long, LatWGS84, LongWGS84, Qualifier, CoordinateSystem) %>% na.omit()

# remove uncertain 
biotype_location_reduced_certain <- biotype_location_reduced %>%
  dplyr::filter(!grepl("Uncertain", Qualifier))

#visualise - looks like a some points are incorrect
rocky_map <- ggplot() +
  geom_sf(data = wrld, fill = "lightyellow", color = "lightgray") +
  
  # Add all points with color mapped to 'source'
  geom_jitter(data = biotype_location_reduced, 
              aes(x = LongWGS84, y = LatWGS84, colour = Qualifier), 
              size = 0.8, width = 0.025, height = 0.025) +
  coord_sf(xlim = c(-11, 3), ylim = c(49, 61)) +
  theme_minimal() +
  labs(x = "Longitude", y = "Latitude") #+
  theme(legend.position = "none")
List of 1
 $ legend.position: chr "none"
 - attr(*, "class")= chr [1:2] "theme" "gg"
 - attr(*, "complete")= logi FALSE
 - attr(*, "validate")= logi TRUE
rocky_map

ggsave(rocky_map, filename = "Figures/JNCC_snapshot_littoral_map.jpeg")
# Convert raster coastline (value=1 for land) to polygons
coast_poly <- as.polygons(terrain, dissolve = TRUE, values = TRUE)

# Keep only the polygons where coastline == 1
coast_poly <- coast_poly[coast_poly$coastline == 1, ]

# Buffer (in degrees — ~0.01° ≈ 1.1 km at equator)
buffer_dist <- 0.03  # adjust based on desired intertidal width
coast_poly_buffered <- terra::buffer(coast_poly, width = buffer_dist)

# Make a SpatVector from Lat/Long WGS84
biotype_vect <- vect(
  biotype_location_reduced,
  geom = c("LongWGS84", "LatWGS84"), 
  crs = "OGC:CRS84"
)

# Intersect points with buffered polygon
biotype_onshore_vect <- terra::intersect(biotype_vect, coast_poly_buffered)

# Convert points and polygon to sf
biotype_sf <- st_as_sf(biotype_onshore_vect)
coast_sf   <- st_as_sf(coast_poly_buffered)

# Plot
jncc_snapshot_littorial_tidy_map <- ggplot() +
  geom_sf(data = coast_sf, fill = "lightgray", color = "lightblue") +
  geom_sf(data = biotype_sf, size = 1, color = "orange") +
  coord_sf(xlim = c(-11, 3), ylim = c(49, 61)) + # adjust to your study area
  theme_minimal() +
  theme(legend.position = "none")

jncc_snapshot_littorial_tidy_map

ggsave(jncc_snapshot_littorial_tidy_map, filename = "Figures/jncc_snapshot_littorial_tidy_map.jpeg", width = 5, height= 8)

Let’s combined the two JNCC sources so we have the best cover for our environmental rocky variable.

# Ensure CRS matches
biotype_sf <- st_transform(biotype_sf, crs(rock_ukash_rast))

# Keep biotype_sf for later use (this is your vector dataset)

# Convert sf to SpatVector for rasterising
biotype_vect <- vect(biotype_sf)

# Create a template raster
rocky_template <- rast(rock_ukash_rast)

# Rasterise points
rocky_rast <- rasterize(biotype_vect, rocky_template, field = 1, background = NA)

# Combine with existing rock raster
combined_rock_rast <- cover(rocky_rast, rock_ukash_rast)

# add in missing bits (highlands coast, wales)

# top coast highlands
north_coast_ext <- ext(-5.5, -2.5, 58.2, 58.9)
r <- terrain
r_crop <- crop(r, north_coast_ext)
r_crop <- resample(r_crop, combined_rock_rast, method = "near")
combined_rock_rast <- cover(combined_rock_rast, r_crop)

# wales
wales_ext <- ext(-5.5, -4.4,  51.5, 52.2)
r_crop2 <- crop(r, wales_ext)
r_crop2 <- resample(r_crop2, combined_rock_rast, method = "near")
combined_rock_rast <- cover(combined_rock_rast, r_crop2)

# big scotland islands
isles_ext <- ext(-7.8, -5.1,  54.7, 58.7)
r_crop3 <- crop(r, isles_ext)
r_crop3 <- resample(r_crop3, combined_rock_rast, method = "near")
combined_rock_rast <- cover(combined_rock_rast, r_crop3)

# Plot
plot(combined_rock_rast, main = "Rocky substrate (binary)", col = "grey10")

res(combined_rock_rast) #0.05 x 0.05
[1] 0.05000000 0.05000001
# expand radius
# Calculate distance from rocky areas (in map units, e.g., meters)
dist_rast <- distance(combined_rock_rast)

|---------|---------|---------|---------|
=========================================
                                          
# Keep only cells within 5 km
expanded_rock <- dist_rast <= 8000

# Convert to binary (1 = rocky/within 1 km, NA = non-rocky)
expanded_rock[expanded_rock == 0] <- NA
expanded_rock[expanded_rock == 1] <- 1

plot(expanded_rock, main = "Expanded rocky substrate", col = "grey10")

expanded_rock_coast_only <- mask(expanded_rock, terrain)

# Convert combined raster back to sf polygons if you want an sf version
combined_rock_sf <- st_as_sf(as.polygons(combined_rock_rast, dissolve = TRUE, values = TRUE))

# keep 1 as 1, set everything else to NA
rock01 <- ifel(combined_rock_rast == 1, 1, NA)

# polygonize only the 1s
pol1 <- as.polygons(rock01, dissolve = TRUE, na.rm = TRUE)
combined_rock_sf <- st_as_sf(pol1)

# optional cleanup
combined_rock_sf <- sf::st_make_valid(combined_rock_sf)
combined_rock_sf <- sf::st_cast(combined_rock_sf, "MULTIPOLYGON")

# make a raster and match format to the other environmental variables
rock_rast_combined <- raster::rasterize(combined_rock_sf, ocean_temp_present)

Wave fetch (Burrows 2012)

2012 wave fetch data is downloaded from https://figshare.com/articles/dataset/Wave_fetch_GIS_layers_for_the_UK_and_Ireland_at_200m_scale/12029682?file=22107477.

Values represent the log base 10 of the summed distance to the nearest land (as the number of 200m grid cell units) across all 32 11.5° sectors. This is the data we’ll use for our models. Higher values mean more exposed, lower values mean less exposed.

# load and plot
fetch_2012 <- rast("Data/Environmental_data/log10_eu200m1a.tif")
plot(fetch_2012)

# Reproject fetch to WGS84 (same CRS as ocean_temp)
fetch_wgs84 <- project(fetch_2012, ocean_temp_present)

# Resample to match ocean_temp grid (interpolates to same cells)
fetch_resampled <- resample(fetch_wgs84, ocean_temp_present, method = "bilinear")
plot(fetch_resampled)

Create stacks

# stack
enviro_vars_current <- stack(c(
  ocean_temp_avg,
  ph_avg,
  fetch_resampled
))

names(enviro_vars_current) <- c(
  "ocean_temp",
  "ph",
  "wave_fetch"
  )

names(enviro_vars_current)
[1] "ocean_temp" "ph"         "wave_fetch"
res(enviro_vars_current) #  0.05000000 x 0.05000001
[1] 0.05000000 0.05000001
#plot
png(
  "Figures/enviro_vars_current.png",
  width = 2000,
  height = 2000,
  res = 300
)

plot(enviro_vars_current,
     main = c("Sea Surface Temperature (SST)", "pH", "Wave fetch"))

dev.off()
png 
  2 
# future


enviro_vars_future <- stack(c(
  ocean_temp_projected_90,
  ph_projected_90,
  fetch_resampled
))

names(enviro_vars_future) <- c(
  "ocean_temp",
  "ph",
  "wave_fetch" # we need the future 
)

#plot
png(
  "Figures/enviro_vars_future.png",
  width = 2000,
  height = 2000,
  res = 300
)

plot(enviro_vars_future,
     main = c("Sea Surface Temperature (SST)", "pH", "Wave fetch"))

dev.off()
png 
  2 
# Combine both stacks into one for plot
enviro_both <- stack(enviro_vars_current, enviro_vars_future)
enviro_both <- dropLayer(enviro_both, nlayers(enviro_both))

# add in rocky layer
rock_rast_raster <- raster(rock_rast_combined)
names(rock_rast_raster) <- "rocky_substrate_binary"
enviro_both <- stack(enviro_both, rock_rast_raster)

#plot

# Save high-resolution plot
png(
  "Figures/enviro_vars.png",
  width = 2500,
  height = 2000,
  res = 300
)

plot(enviro_both,
     main = c("Sea surface temperature (Current)", 
              "pH (Current)", 
              "Wave fetch", 
              "Sea surface temperature (Future)", 
              "pH (Future)", 
              "Rocky substrate")
)

dev.off()
png 
  2 

Fix unmatched occurrence points to environmental data

We can use the extract function from the raster package to check occurrences and environmental data overlap. Some points are not matched to our environmental data (we’ve got quite a few NAs). We need to find the closest point which is classed as coastline and use that instead.

Semibalanus corrections

SB MarClim

# snap the valid environmental cells from function created earlier
semibalanus_corrected_marclim <- snap_to_nearest_valid(
  enviro_vars_current,
  semibalanus_MarClim)

# check
envir_terra <- rast(enviro_vars_current)
presvals_semibalanus_corrected_marclim <- extract(envir_terra, semibalanus_corrected_marclim) # this looks better

# remove the ID variable
presvals_semibalanus_corrected_marclim <- presvals_semibalanus_corrected_marclim[,-1]

# check the new data
head(semibalanus_corrected_marclim)
      lon     lat
1 -4.5592 50.8362
2 -2.3760 50.6330
3 -2.4600 50.5130
4 -2.4544 50.5414
5 -1.9450 50.6070
6 -2.4448 50.6062
# for the PA version
marclim_broad_sp_PA_semibalanus <- marclim_broad_sp_PA %>% select(-c("Chthamalus.montagui"))

semibalanus_corrected_marclim_PA <- snap_to_nearest_valid(
  enviro_vars_current,
  marclim_broad_sp_PA_semibalanus)

# snap the valid environmental cells from function created earlier
presvals_semibalanus_corrected_marclim_PA <- extract(envir_terra, semibalanus_corrected_marclim_PA) # this looks better

# remove the ID variable
presvals_semibalanus_corrected_marclim_PA <- presvals_semibalanus_corrected_marclim_PA[,-1]

# check the new data
head(semibalanus_corrected_marclim_PA)
      lon     lat
1 -4.5592 50.8362
2 -2.3760 50.6330
3 -2.4600 50.5130
4 -2.4544 50.5414
5 -1.9450 50.6070
6 -2.1250 50.5750
#add PA back in
semibalanus_corrected_marclim_PA$Semibalanus.balanoides = marclim_broad_sp_PA_semibalanus$Semibalanus.balanoides

SB baseline

# snap the valid environmental cells from function created earlier
semibalanus_corrected_gbif_baseline <- snap_to_nearest_valid(
  enviro_vars_current,
  semibalanus_GBIF_baseline)

# check
presvals_semibalanus_corrected_gbif_baseline <- extract(envir_terra, semibalanus_corrected_gbif_baseline[, c("lon", "lat")]) # this looks better

# remove the ID variable
presvals_semibalanus_corrected_gbif_baseline <- presvals_semibalanus_corrected_gbif_baseline[,-1]

# check the new data
head(semibalanus_corrected_gbif_baseline)
        lon      lat
1 -5.495591 50.21954
2 -5.025000 50.17500
3 -4.325000 53.12500
4 -5.061596 50.14580
5 -4.075000 50.27500
6 -5.475000 50.07500
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_semibalanus_corrected_gbif_baseline, 1, function(x) any(is.na(x)))

table(na_points) #no NAs
na_points
FALSE 
 1611 

SB GBIF + GBIF

# snap the valid environmental cells from function created earlier
semibalanus_corrected_gbif <- snap_to_nearest_valid(
  enviro_vars_current,
  semibalanus_GBIF_GBIF)

# check
presvals_semibalanus_corrected_gbif <- extract(envir_terra, semibalanus_corrected_gbif[, c("lon", "lat")]) # this looks better

# remove the ID variable
presvals_semibalanus_corrected_gbif <- presvals_semibalanus_corrected_gbif[,-1]

# check the new data
head(semibalanus_corrected_gbif)
        lon      lat
1 -5.495591 50.21954
2 -5.025000 50.17500
3 -4.325000 53.12500
4 -5.061596 50.14580
5 -4.075000 50.27500
6 -5.475000 50.07500
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_semibalanus_corrected_gbif, 1, function(x) any(is.na(x)))

table(na_points) #no NAs
na_points
FALSE 
 1649 

SB GBIF + eDNA

# snap the valid environmental cells from function created earlier
semibalanus_corrected_GBIF_eDNA <- snap_to_nearest_valid(
  enviro_vars_current,
  semibalanus_GBIF_eDNA)

# check
presvals_semibalanus_corrected_GBIF_eDNA <- extract(envir_terra, semibalanus_corrected_GBIF_eDNA[, c("lon", "lat")]) # this looks better

# remove the ID variable
presvals_semibalanus_corrected_GBIF_eDNA <- presvals_semibalanus_corrected_GBIF_eDNA[,-1]

# check the new data
head(semibalanus_corrected_GBIF_eDNA)
        lon      lat
1 -5.495591 50.21954
2 -5.025000 50.17500
3 -4.325000 53.12500
4 -5.061596 50.14580
5 -4.075000 50.27500
6 -5.475000 50.07500
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_semibalanus_corrected_GBIF_eDNA, 1, function(x) any(is.na(x)))

table(na_points) #no NAs
na_points
FALSE 
 1631 

Chthamalus corrections

CM MarClim

# snap the valid environmental cells from function created earlier
chthamalus_corrected_marclim <- snap_to_nearest_valid(
  enviro_vars_current,
  chthamalus_MarClim)

# check
presvals_chthamalus_corrected_marclim <- extract(envir_terra, chthamalus_corrected_marclim) # this looks better

# remove the ID variable
presvals_chthamalus_corrected_marclim <- presvals_chthamalus_corrected_marclim[,-1]

# check the new data
head(chthamalus_corrected_marclim)
      lon     lat
1 -4.5592 50.8362
2 -2.3760 50.6330
3 -2.4600 50.5130
4 -2.4544 50.5414
5 -1.9450 50.6070
6 -2.1250 50.5750
# for the PA version

# call function we created eariler to correct points
marclim_broad_sp_PA_chthamalus <- marclim_broad_sp_PA %>% select(-c("Semibalanus.balanoides"))

# snap the valid environmental cells from function created earlier
chthamalus_corrected_marclim_PA <- snap_to_nearest_valid(
  enviro_vars_current,
  marclim_broad_sp_PA_chthamalus)

presvals_chthamalus_corrected_marclim_PA <- extract(envir_terra, chthamalus_corrected_marclim_PA) # this looks better

# remove the ID variable
presvals_chthamalus_corrected_marclim_PA <- presvals_chthamalus_corrected_marclim_PA[,-1]

# check the new data
head(chthamalus_corrected_marclim_PA)
      lon     lat
1 -4.5592 50.8362
2 -2.3760 50.6330
3 -2.4600 50.5130
4 -2.4544 50.5414
5 -1.9450 50.6070
6 -2.1250 50.5750
#add PA back in
chthamalus_corrected_marclim_PA$Chthamalus.montagui = marclim_broad_sp_PA_chthamalus$Chthamalus.montagui

CM baseline

# snap the valid environmental cells from function created earlier
chthamalus_corrected_gbif_baseline <- snap_to_nearest_valid(
  enviro_vars_current,
  chthamalus_GBIF_baseline)

# check
presvals_chthamalus_corrected_gbif_baseline <- extract(envir_terra, chthamalus_corrected_gbif_baseline[, c("lon", "lat")]) # this looks better

# remove the ID variable
presvals_chthamalus_corrected_gbif_baseline <- presvals_chthamalus_corrected_gbif_baseline[,-1]

# check the new data
head(chthamalus_corrected_gbif_baseline)
        lon      lat
1 -5.025000 50.17500
2 -5.105353 50.41336
3 -5.495591 50.21954
4 -4.559459 50.79599
5 -5.475000 50.07500
6 -5.043342 50.14615
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_chthamalus_corrected_gbif_baseline, 1, function(x) any(is.na(x)))

table(na_points) #no NAs
na_points
FALSE 
  578 

CM GBIF + GBIF

# snap the valid environmental cells from function created earlier
chthamalus_corrected_gbif <- snap_to_nearest_valid(
  enviro_vars_current,
  chthamalus_GBIF_GBIF)

# check
presvals_chthamalus_corrected_gbif <- extract(envir_terra, chthamalus_corrected_gbif[, c("lon", "lat")]) # this looks better

# remove the ID variable
presvals_chthamalus_corrected_gbif <- presvals_chthamalus_corrected_gbif[,-1]

# check the new data
head(chthamalus_corrected_gbif)
        lon      lat
1 -5.025000 50.17500
2 -5.105353 50.41336
3 -5.495591 50.21954
4 -4.559459 50.79599
5 -5.475000 50.07500
6 -5.043342 50.14615
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_chthamalus_corrected_gbif, 1, function(x) any(is.na(x)))

table(na_points) #no Nas
na_points
FALSE 
  605 

CM GBIF + eDNA

# snap the valid environmental cells from function created earlier
chthamalus_corrected_GBIF_eDNA <- snap_to_nearest_valid(
  enviro_vars_current,
  chthamalus_GBIF_eDNA)

# check
presvals_chthamalus_corrected_GBIF_eDNA <- extract(envir_terra, chthamalus_corrected_GBIF_eDNA[, c("lon", "lat")]) # this looks better

# remove the ID variable
presvals_chthamalus_corrected_GBIF_eDNA <- presvals_chthamalus_corrected_GBIF_eDNA[,-1]

# check the new data
head(chthamalus_corrected_GBIF_eDNA)
        lon      lat
1 -5.025000 50.17500
2 -5.105353 50.41336
3 -5.495591 50.21954
4 -4.559459 50.79599
5 -5.475000 50.07500
6 -5.043342 50.14615
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_chthamalus_corrected_GBIF_eDNA, 1, function(x) any(is.na(x)))

table(na_points) #no NAs
na_points
FALSE 
  589 

biomod2 workflow

We are going to build our models and forecasts using the biomod2 package. We will build four models, visualize their outputs and forecast changes into the far future (2100).

Data formatting

For our models, the GBIF base + GBIF addition and GBIF base + eDNA addition data will be used to build the individual models, being split for example 10 times into 70/30 percentages to cross-validate them (which is defined in the next section).

The MarClim data will be used to evaluate all models (individual or ensemble) - this is defined when formatting the data in this section.

Data set 1 (Semibalanus - GBIF + GBIF)

# pa as df and tidy
# marclim
semibalanus_corrected_marclim_PA_df <- as.data.frame(semibalanus_corrected_marclim_PA)

# occurences
semibalanus_corrected_gbif_df <- as.data.frame(semibalanus_corrected_gbif)
semibalanus_corrected_gbif_df$Semibalanus.balanoides <- 1 #add p for gbif

# Extract env values
env_vals <- extract(envir_terra, semibalanus_corrected_gbif)

# Bind back to coordinates
pts_with_vals <- cbind(semibalanus_corrected_gbif_df, env_vals)

# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]

Now let’s put all our data elements into the BIOMOD_FormatingData function.

set.seed(seed.val.set)

myBiomodData_semibalanus_GBIF <- BIOMOD_FormatingData(
  resp.var = rep(1, nrow(pts_clean)),
  # presence-only
  expl.var = enviro_vars_current,
  resp.xy = pts_clean,
  resp.name = 'Semibalanus.balanoides',
  PA.nb.rep = 1,
  # Number of pseudo-absence sets (10 recom by biomd)
  PA.nb.absences = nrow(pts_clean),
  # Number of pseudo-absences (same number of presences)
  PA.strategy = 'random',
  eval.resp.var = semibalanus_corrected_marclim_PA_df$Semibalanus.balanoides,
  # Evaluation responses (0/1)
  eval.resp.xy = semibalanus_corrected_marclim_PA_df[, c("lon", "lat")],
  # Evaluation locations,
  eval.expl.var = presvals_semibalanus_corrected_marclim_PA,
  #filter.raster = TRUE #autofilter,
  na.rm = FALSE,
  seed.val = seed.val.set
)

summary(myBiomodData_semibalanus_GBIF)

# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_semibalanus_GBIF.png", 
    width = 2000, height = 1500, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_GBIF)
dev.off()

Data set 2 (Semibalanus - GBIF + eDNA)

# occurences
semibalanus_corrected_GBIF_eDNA_df <- as.data.frame(semibalanus_corrected_GBIF_eDNA)
semibalanus_corrected_GBIF_eDNA_df$Semibalanus.balanoides <- 1 #add p for gbif

# Extract env values
env_vals <- extract(envir_terra, semibalanus_corrected_GBIF_eDNA)

# Bind back to coordinates
pts_with_vals <- cbind(semibalanus_corrected_GBIF_eDNA_df, env_vals)

# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]

Now let’s put all our data elements into the BIOMOD_FormatingData function.

set.seed(seed.val.set)

myBiomodData_semibalanus_GBIF_eDNA <- BIOMOD_FormatingData(
  resp.var = rep(1, nrow(pts_clean)) ,
  # presence-only
  expl.var = enviro_vars_current,
  resp.xy = pts_clean,
  resp.name = 'Semibalanus.balanoides',
  PA.nb.rep = 1,
  # Number of pseudo-absence sets (10 recom by biomd)
  PA.nb.absences = nrow(pts_clean),
  # Number of pseudo-absences (same number of presences)
  PA.strategy = 'random',
  eval.resp.var = semibalanus_corrected_marclim_PA_df$Semibalanus.balanoides,
  # Evaluation responses (0/1)
  eval.resp.xy = semibalanus_corrected_marclim_PA_df[, c("lon", "lat")],
  # Evaluation locations,
  eval.expl.var = presvals_semibalanus_corrected_marclim_PA,
  #filter.raster = TRUE #autofilter
  na.rm = FALSE,
  seed.val = seed.val.set
)

summary(myBiomodData_semibalanus_GBIF_eDNA)

# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_semibalanus_GBIF_eDNA.png", 
    width = 2000, height = 1500, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_GBIF_eDNA)
dev.off()

Data set 3 (SB - baseline)

# occurences
semibalanus_corrected_gbif_baseline_df <- as.data.frame(semibalanus_corrected_gbif_baseline)
semibalanus_corrected_gbif_baseline_df$Semibalanus.balanoides <- 1 #add p for gbif

# Extract env values
env_vals <- extract(envir_terra, semibalanus_corrected_gbif_baseline)

# Bind back to coordinates
pts_with_vals <- cbind(semibalanus_corrected_gbif_baseline_df, env_vals)

# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]
set.seed(seed.val.set)

myBiomodData_semibalanus_baseline <- BIOMOD_FormatingData(
  resp.var = rep(1, nrow(pts_clean)) ,
  # presence-only
  expl.var = enviro_vars_current,
  resp.xy = pts_clean,
  resp.name = 'Semibalanus.balanoides',
  PA.nb.rep = 1,
  # Number of pseudo-absence sets (10 recom by biomd)
  PA.nb.absences = nrow(pts_clean),
  # Number of pseudo-absences (same number of presences)
  PA.strategy = 'random',
  eval.resp.var = semibalanus_corrected_marclim_PA_df$Semibalanus.balanoides,
  # Evaluation responses (0/1)
  eval.resp.xy = semibalanus_corrected_marclim_PA_df[, c("lon", "lat")],
  # Evaluation locations,
  eval.expl.var = presvals_semibalanus_corrected_marclim_PA,
  #filter.raster = TRUE #autofilter
  na.rm = FALSE,
  seed.val = seed.val.set
)

summary(myBiomodData_semibalanus_baseline)

# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_semibalanus_baseline.png", 
    width = 2000, height = 1500, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_baseline)
dev.off()

Data set 4 (Chthamalus - GBIF + GBIF)

# pa as df and tidy
# marclim
chthamalus_corrected_marclim_PA_df <- as.data.frame(chthamalus_corrected_marclim_PA)

# occurences
chthamalus_corrected_gbif_df <- as.data.frame(chthamalus_corrected_gbif)
chthamalus_corrected_gbif_df$Chthamalus.montagui <- 1 #add p for gbif

# Extract env values
env_vals <- extract(envir_terra, chthamalus_corrected_gbif)

# Bind back to coordinates
pts_with_vals <- cbind(chthamalus_corrected_gbif_df, env_vals)

# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]
set.seed(seed.val.set)

myBiomodData_chthamalus_GBIF <- BIOMOD_FormatingData(
  resp.var = rep(1, nrow(pts_clean)) ,
  # presence-only
  expl.var = enviro_vars_current,
  resp.xy = pts_clean,
  resp.name = 'Chthamalus.montagui',
  PA.nb.rep = 1,
  # Number of pseudo-absence sets (10 recom by biomd)
  PA.nb.absences = nrow(pts_clean),
  # Number of pseudo-absences (same number of presences)
  PA.strategy = 'random',
  eval.resp.var = chthamalus_corrected_marclim_PA_df$Chthamalus.montagui,
  # Evaluation responses (0/1)
  eval.resp.xy = chthamalus_corrected_marclim_PA_df[, c("lon", "lat")],
  # Evaluation locations,
  eval.expl.var = presvals_chthamalus_corrected_marclim_PA,
  #filter.raster = TRUE #autofilter
  na.rm = FALSE,
  seed.val = seed.val.set
)

myBiomodData_chthamalus_GBIF

# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_chthamalus_GBIF.png", 
    width = 2000, height = 1500, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_GBIF)
dev.off()

Data set 5 (Chthamalus - GBIF + eDNA)

# occurences
chthamalus_corrected_GBIF_eDNA_df <- as.data.frame(chthamalus_corrected_GBIF_eDNA)
chthamalus_corrected_GBIF_eDNA_df$Chthamalus.montagui<- 1 #add p for gbif

# Extract env values
env_vals <- extract(envir_terra, chthamalus_corrected_GBIF_eDNA)

# Bind back to coordinates
pts_with_vals <- cbind(chthamalus_corrected_GBIF_eDNA_df, env_vals)

# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]
set.seed(seed.val.set)

myBiomodData_chthamalus_GBIF_eDNA <- BIOMOD_FormatingData(
  resp.var = rep(1, nrow(pts_clean)) ,
  # presence-only
  expl.var = enviro_vars_current,
  resp.xy = pts_clean,
  resp.name = 'Chthamalus.montagui',
  PA.nb.rep = 1,
  # Number of pseudo-absence sets (10 recom by biomd)
  PA.nb.absences = nrow(pts_clean),
  # Number of pseudo-absences (same number of presences)
  PA.strategy = 'random',
  eval.resp.var = chthamalus_corrected_marclim_PA_df$Chthamalus.montagui,
  # Evaluation responses (0/1)
  eval.resp.xy = chthamalus_corrected_marclim_PA_df[, c("lon", "lat")],
  # Evaluation locations,
  eval.expl.var = presvals_chthamalus_corrected_marclim_PA,
  #filter.raster = TRUE #autofilter
  na.rm = FALSE,
  seed.val = seed.val.set
)

myBiomodData_chthamalus_GBIF_eDNA

# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_chthamalus_GBIF_eDNA.png", 
    width = 2000, height = 1500, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_GBIF_eDNA)
dev.off()

Data set 6 (CM baseline)

# occurences
chthamalus_corrected_gbif_baseline_df <- as.data.frame(chthamalus_corrected_gbif_baseline)
chthamalus_corrected_gbif_baseline_df$Chthamalus.montagui <- 1 #add p for gbif

# Extract env values
env_vals <- extract(envir_terra, chthamalus_corrected_gbif_baseline)

# Bind back to coordinates
pts_with_vals <- cbind(chthamalus_corrected_gbif_baseline_df, env_vals)

# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]
set.seed(seed.val.set)

myBiomodData_chthamalus_baseline <- BIOMOD_FormatingData(
  resp.var = rep(1, nrow(pts_clean)) ,
  # presence-only
  expl.var = enviro_vars_current,
  resp.xy = pts_clean,
  resp.name = 'Chthamalus.montagui',
  PA.nb.rep = 1,
  # Number of pseudo-absence sets (10 recom by biomd)
  PA.nb.absences = nrow(pts_clean),
  # Number of pseudo-absences (same number of presences)
  PA.strategy = 'random',
  eval.resp.var = chthamalus_corrected_marclim_PA_df$Chthamalus.montagui,
  # Evaluation responses (0/1)
  eval.resp.xy = chthamalus_corrected_marclim_PA_df[, c("lon", "lat")],
  # Evaluation locations,
  eval.expl.var = presvals_chthamalus_corrected_marclim_PA,
  #filter.raster = TRUE #autofilter
  na.rm = FALSE,
  seed.val = seed.val.set
)

myBiomodData_chthamalus_baseline

# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_chthamalus_baseline.png", 
    width = 2000, height = 1500, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_baseline)
dev.off()

We now have six biomod objects ready for modelling.

#SB
summary(myBiomodData_semibalanus_baseline)
      dataset run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial  NA <NA>      1611             0               0      1611
2  evaluation  NA <NA>       218            18               0         0
3 calibration  NA  PA1      1611             0            1611        NA
summary(myBiomodData_semibalanus_GBIF)
      dataset run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial  NA <NA>      1649             0               0      1649
2  evaluation  NA <NA>       218            18               0         0
3 calibration  NA  PA1      1649             0            1649        NA
summary(myBiomodData_semibalanus_GBIF_eDNA)
      dataset run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial  NA <NA>      1631             0               0      1631
2  evaluation  NA <NA>       218            18               0         0
3 calibration  NA  PA1      1631             0            1631        NA
#CM
summary(myBiomodData_chthamalus_baseline)
      dataset run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial  NA <NA>       578             0               0       578
2  evaluation  NA <NA>       145            91               0         0
3 calibration  NA  PA1       578             0             578        NA
summary(myBiomodData_chthamalus_GBIF)
      dataset run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial  NA <NA>       605             0               0       605
2  evaluation  NA <NA>       145            91               0         0
3 calibration  NA  PA1       605             0             605        NA
summary(myBiomodData_chthamalus_GBIF_eDNA)
      dataset run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial  NA <NA>       589             0               0       589
2  evaluation  NA <NA>       145            91               0         0
3 calibration  NA  PA1       589             0             589        NA

Cross validation

Cross validation 1 (Semibalanus - GBIF + GBIF)

set.seed(seed.val.set)
cv.sv <- bm_CrossValidation(bm.format = myBiomodData_semibalanus_GBIF,
                            strategy = 'random',
                            nb.rep = 2,
                            perc = 0.7)


Checking Cross-Validation arguments...

   > Random cross-validation selection
summary(myBiomodData_semibalanus_GBIF, calib.lines = cv.sv)
      dataset  run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial <NA> <NA>      1649             0               0      1649
2  evaluation <NA> <NA>       218            18               0         0
3 calibration RUN1  PA1      1154             0            1154        NA
4  validation RUN1  PA1       495             0             495        NA
5 calibration RUN2  PA1      1154             0            1154        NA
6  validation RUN2  PA1       495             0             495        NA
# Save plot to PNG
png("Figures/myBiomodData_validation_semibalanus_visual.png", 
    width = 2000, height = 3000, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_GBIF, calib.lines = cv.sv)
$data.vect
 class       : SpatVector 
 geometry    : points 
 dimensions  : 8481, 2  (geometries, attributes)
 extent      : -8.025, 1.975, 49.725, 60.875  (xmin, xmax, ymin, ymax)
 coord. ref. :  
 names       :  resp         dataset
 type        : <num>           <chr>
 values      :    10 Initial dataset
                  10 Initial dataset
                  10 Initial dataset

$data.label
                              9                              10 
                "**Presences**"       "Presences (calibration)" 
                             11                              12 
       "Presences (validation)"        "Presences (evaluation)" 
                             19                              20 
            "**True Absences**"   "True Absences (calibration)" 
                             21                              22 
   "True Absences (validation)"    "True Absences (evaluation)" 
                             29                              30 
          "**Pseudo-Absences**" "Pseudo-Absences (calibration)" 
                             31                               1 
 "Pseudo-Absences (validation)"                    "Background" 

$data.plot
dev.off()
png 
  2 

Cross validation 2 (Semibalanus - GBIF + eDNA)

set.seed(seed.val.set)
cv.sc <- bm_CrossValidation(bm.format = myBiomodData_semibalanus_GBIF_eDNA,
                            strategy = 'random',
                            nb.rep = 2,
                            perc = 0.7)


Checking Cross-Validation arguments...

   > Random cross-validation selection
summary(myBiomodData_semibalanus_GBIF_eDNA, calib.lines = cv.sc)
      dataset  run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial <NA> <NA>      1631             0               0      1631
2  evaluation <NA> <NA>       218            18               0         0
3 calibration RUN1  PA1      1142             0            1142        NA
4  validation RUN1  PA1       489             0             489        NA
5 calibration RUN2  PA1      1142             0            1142        NA
6  validation RUN2  PA1       489             0             489        NA
# Save plot to PNG
png("Figures/myBiomodData_validation_semibalanus_combined.png", 
    width = 2000, height = 3000, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_GBIF_eDNA, calib.lines = cv.sc)
$data.vect
 class       : SpatVector 
 geometry    : points 
 dimensions  : 8391, 2  (geometries, attributes)
 extent      : -8.025, 1.975, 49.725, 60.875  (xmin, xmax, ymin, ymax)
 coord. ref. :  
 names       :  resp         dataset
 type        : <num>           <chr>
 values      :    10 Initial dataset
                  10 Initial dataset
                  10 Initial dataset

$data.label
                              9                              10 
                "**Presences**"       "Presences (calibration)" 
                             11                              12 
       "Presences (validation)"        "Presences (evaluation)" 
                             19                              20 
            "**True Absences**"   "True Absences (calibration)" 
                             21                              22 
   "True Absences (validation)"    "True Absences (evaluation)" 
                             29                              30 
          "**Pseudo-Absences**" "Pseudo-Absences (calibration)" 
                             31                               1 
 "Pseudo-Absences (validation)"                    "Background" 

$data.plot
dev.off()
png 
  2 

Cross validation 3 (SB - baseline)

set.seed(seed.val.set)
cv.sbase <- bm_CrossValidation(bm.format = myBiomodData_semibalanus_baseline,
                            strategy = 'random',
                            nb.rep = 2,
                            perc = 0.7)


Checking Cross-Validation arguments...

   > Random cross-validation selection
summary(myBiomodData_semibalanus_baseline, calib.lines = cv.sbase)
      dataset  run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial <NA> <NA>      1611             0               0      1611
2  evaluation <NA> <NA>       218            18               0         0
3 calibration RUN1  PA1      1128             0            1128        NA
4  validation RUN1  PA1       483             0             483        NA
5 calibration RUN2  PA1      1128             0            1128        NA
6  validation RUN2  PA1       483             0             483        NA
# Save plot to PNG
png("Figures/myBiomodData_validation_semibalanus_baseline.png", 
    width = 2000, height = 3000, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_baseline, calib.lines = cv.sbase)
$data.vect
 class       : SpatVector 
 geometry    : points 
 dimensions  : 8291, 2  (geometries, attributes)
 extent      : -8.025, 1.975, 49.725, 60.875  (xmin, xmax, ymin, ymax)
 coord. ref. :  
 names       :  resp         dataset
 type        : <num>           <chr>
 values      :    10 Initial dataset
                  10 Initial dataset
                  10 Initial dataset

$data.label
                              9                              10 
                "**Presences**"       "Presences (calibration)" 
                             11                              12 
       "Presences (validation)"        "Presences (evaluation)" 
                             19                              20 
            "**True Absences**"   "True Absences (calibration)" 
                             21                              22 
   "True Absences (validation)"    "True Absences (evaluation)" 
                             29                              30 
          "**Pseudo-Absences**" "Pseudo-Absences (calibration)" 
                             31                               1 
 "Pseudo-Absences (validation)"                    "Background" 

$data.plot
dev.off()
png 
  2 

Cross validation 4 (Chthamalus - GBIF + GBIF)

set.seed(seed.val.set)
cv.cv <- bm_CrossValidation(bm.format = myBiomodData_chthamalus_GBIF,
                            strategy = 'random',
                            nb.rep = 2,
                            perc = 0.7)


Checking Cross-Validation arguments...

   > Random cross-validation selection
summary(myBiomodData_chthamalus_GBIF, calib.lines = cv.cv)
      dataset  run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial <NA> <NA>       605             0               0       605
2  evaluation <NA> <NA>       145            91               0         0
3 calibration RUN1  PA1       424             0             424        NA
4  validation RUN1  PA1       181             0             181        NA
5 calibration RUN2  PA1       424             0             424        NA
6  validation RUN2  PA1       181             0             181        NA
# Save plot to PNG
png("Figures/myBiomodData_validation_chthamalus_visual.png", 
    width = 2000, height = 3000, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_GBIF, calib.lines = cv.cv)
$data.vect
 class       : SpatVector 
 geometry    : points 
 dimensions  : 3261, 2  (geometries, attributes)
 extent      : -8.025, 1.975, 49.725, 60.825  (xmin, xmax, ymin, ymax)
 coord. ref. :  
 names       :  resp         dataset
 type        : <num>           <chr>
 values      :    10 Initial dataset
                  10 Initial dataset
                  10 Initial dataset

$data.label
                              9                              10 
                "**Presences**"       "Presences (calibration)" 
                             11                              12 
       "Presences (validation)"        "Presences (evaluation)" 
                             19                              20 
            "**True Absences**"   "True Absences (calibration)" 
                             21                              22 
   "True Absences (validation)"    "True Absences (evaluation)" 
                             29                              30 
          "**Pseudo-Absences**" "Pseudo-Absences (calibration)" 
                             31                               1 
 "Pseudo-Absences (validation)"                    "Background" 

$data.plot
dev.off()
png 
  2 

Cross validation 5 (Chthamalus - GBIF + eDNA)

set.seed(seed.val.set)
cv.cc <- bm_CrossValidation(bm.format = myBiomodData_chthamalus_GBIF_eDNA,
                            strategy = 'random',
                            nb.rep = 2,
                            perc = 0.7)


Checking Cross-Validation arguments...

   > Random cross-validation selection
summary(myBiomodData_chthamalus_GBIF_eDNA, calib.lines = cv.cc)
      dataset  run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial <NA> <NA>       589             0               0       589
2  evaluation <NA> <NA>       145            91               0         0
3 calibration RUN1  PA1       412             0             412        NA
4  validation RUN1  PA1       177             0             177        NA
5 calibration RUN2  PA1       412             0             412        NA
6  validation RUN2  PA1       177             0             177        NA
# Save plot to PNG
png("Figures/myBiomodData_validation_chthamalus_combined.png", 
    width = 2000, height = 3000, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_GBIF_eDNA, calib.lines = cv.cc)
$data.vect
 class       : SpatVector 
 geometry    : points 
 dimensions  : 3181, 2  (geometries, attributes)
 extent      : -8.025, 1.975, 49.725, 60.825  (xmin, xmax, ymin, ymax)
 coord. ref. :  
 names       :  resp         dataset
 type        : <num>           <chr>
 values      :    10 Initial dataset
                  10 Initial dataset
                  10 Initial dataset

$data.label
                              9                              10 
                "**Presences**"       "Presences (calibration)" 
                             11                              12 
       "Presences (validation)"        "Presences (evaluation)" 
                             19                              20 
            "**True Absences**"   "True Absences (calibration)" 
                             21                              22 
   "True Absences (validation)"    "True Absences (evaluation)" 
                             29                              30 
          "**Pseudo-Absences**" "Pseudo-Absences (calibration)" 
                             31                               1 
 "Pseudo-Absences (validation)"                    "Background" 

$data.plot
dev.off()
png 
  2 

Cross validation 6 (CM baseline)

set.seed(seed.val.set)
cv.cbase <- bm_CrossValidation(bm.format = myBiomodData_chthamalus_baseline,
                            strategy = 'random',
                            nb.rep = 2,
                            perc = 0.7)


Checking Cross-Validation arguments...

   > Random cross-validation selection
summary(myBiomodData_chthamalus_baseline, calib.lines = cv.cbase)
      dataset  run   PA Presences True_Absences Pseudo_Absences Undefined
1     initial <NA> <NA>       578             0               0       578
2  evaluation <NA> <NA>       145            91               0         0
3 calibration RUN1  PA1       405             0             405        NA
4  validation RUN1  PA1       173             0             173        NA
5 calibration RUN2  PA1       405             0             405        NA
6  validation RUN2  PA1       173             0             173        NA
# Save plot to PNG
png("Figures/myBiomodData_validation_chthamalus_baseline.png", 
    width = 2000, height = 3000, res = 300)  # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_baseline, calib.lines = cv.cbase)
$data.vect
 class       : SpatVector 
 geometry    : points 
 dimensions  : 3126, 2  (geometries, attributes)
 extent      : -8.025, 1.975, 49.725, 60.825  (xmin, xmax, ymin, ymax)
 coord. ref. :  
 names       :  resp         dataset
 type        : <num>           <chr>
 values      :    10 Initial dataset
                  10 Initial dataset
                  10 Initial dataset

$data.label
                              9                              10 
                "**Presences**"       "Presences (calibration)" 
                             11                              12 
       "Presences (validation)"        "Presences (evaluation)" 
                             19                              20 
            "**True Absences**"   "True Absences (calibration)" 
                             21                              22 
   "True Absences (validation)"    "True Absences (evaluation)" 
                             29                              30 
          "**Pseudo-Absences**" "Pseudo-Absences (calibration)" 
                             31                               1 
 "Pseudo-Absences (validation)"                    "Background" 

$data.plot
dev.off()
png 
  2 

Run models

Time to run our models!

TSS_thres <- 0.35
ROC_thres <- 0.65

Model 1 (Semibalanus - GBIF + GBIF)

set.seed(seed.val.set)

# Model single models
myBiomodModelOut_semibalanus_GBIF_GBIF <- BIOMOD_Modeling(
  bm.format = myBiomodData_semibalanus_GBIF,
  models = c('GAM','GLM','RF','MAXENT'),
  modeling.id = "Semibalanus_GBIF_GBIF",
  CV.strategy = 'random', #cross-validation selection strategy
  CV.nb.rep = 2, #number of sets of cross-validation points
  CV.perc = 0.7, #percentage kept for calibration
  OPT.strategy = 'default', 
  var.import = 2,
  metric.eval = c('TSS', 'ROC'),
  seed.val = seed.val.set
)

# check which options can be altered
get_options(myBiomodModelOut_semibalanus_GBIF_GBIF)

# see output
myBiomodModelOut_semibalanus_GBIF_GBIF
# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Individual-GBIF-GBIF/"
figure_path <- "Figures/Semi.bal/Individual-GBIF-GBIF/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_semibalanus_GBIF_GBIF)
var_imp <- get_variables_importance(myBiomodModelOut_semibalanus_GBIF_GBIF)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, dataset = "calibration")

algorithm_cal_plot_SB_CO <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_SB_CO, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, dataset = "evaluation")

algorithm_eval_plot_SB_CO <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_SB_CO, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, group.by = c('algo', 'algo'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)

# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, group.by = c('algo', 'run'))

plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, group.by = c('expl.var', 'algo', 'algo'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, group.by = c('expl.var', 'algo', 'run'))

plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)

# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_semibalanus_GBIF_GBIF)[c(1:3, 12:14)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, 
                      models.chosen = get_built_models(myBiomodModelOut_semibalanus_GBIF_GBIF)[3],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)
set.seed(seed.val.set)

# Model ensemble models
myBiomodEM_semibalanus_GBIF_GBIF <- BIOMOD_EnsembleModeling(
  bm.mod = myBiomodModelOut_semibalanus_GBIF_GBIF,
  models.chosen = 'all',
  em.by = 'all',
  em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
  metric.select = c('TSS', 'ROC'),
  metric.select.thresh = c(TSS_thres, ROC_thres),
  metric.eval = c('TSS', 'ROC'),
  var.import = 2,
  EMci.alpha = 0.05,
  EMwmean.decay = 'proportional',
  seed.val = seed.val.set
)

myBiomodEM_semibalanus_GBIF_GBIF
# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Ensemble-GBIF-GBIF/"
figure_path <- "Figures/Semi.bal/Ensemble-GBIF-GBIF/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_semibalanus_GBIF_GBIF)
var_imp <- get_variables_importance(myBiomodEM_semibalanus_GBIF_GBIF)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, group.by = 'full.name')

ensemble_cal_plot_SB_CO <- eval_calib$plot + theme_classic() + labs(colour = "Type") + theme(legend.position = "bottom")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(ensemble_cal_plot_SB_CO, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, dataset = "evaluation")

ensemble_eval_plot_SB_CO <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_SB_CO, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, group.by = c('full.name', 'full.name'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Model",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 5, height = 3)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, c('expl.var', 'full.name', 'full.name'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, group.by = c('expl.var', 'algo', 'merged.by.run'))

plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Response curves
models_to_plot <- get_built_models(myBiomodEM_semibalanus_GBIF_GBIF)[c(1, 6, 7)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, 
                      models.chosen = get_built_models(myBiomodEM_semibalanus_GBIF_GBIF)[3],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)

Model 2 (Semibalanus - GBIF + eDNA)

set.seed(seed.val.set)

# Model single models
myBiomodModelOut_semibalanus_GBIF_eDNA <- BIOMOD_Modeling(
  bm.format = myBiomodData_semibalanus_GBIF_eDNA,
  models = c('GAM','GLM','RF','MAXENT'),
  modeling.id = "Semibalanus_GBIF_eDNA",
  CV.strategy = 'random',
  CV.nb.rep = 2,
  CV.perc = 0.7,
  OPT.strategy = 'default',
  var.import = 2,
  metric.eval = c('TSS', 'ROC'),
  seed.val = seed.val.set
)

get_options(myBiomodModelOut_semibalanus_GBIF_eDNA)

myBiomodModelOut_semibalanus_GBIF_eDNA
# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Individual-GBIF-eDNA/"
figure_path <- "Figures/Semi.bal/Individual-GBIF-eDNA/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_semibalanus_GBIF_eDNA)
var_imp <- get_variables_importance(myBiomodModelOut_semibalanus_GBIF_eDNA)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, dataset = "calibration")

algorithm_cal_plot_SB_CE <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_SB_CE, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, dataset = "evaluation")

algorithm_eval_plot_SB_CE <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_SB_CE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, group.by = c('algo', 'algo'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)

# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, group.by = c('algo', 'run'))

plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'algo'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'run'))

plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)

# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_semibalanus_GBIF_eDNA)[c(1:3, 12:14)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, 
                      models.chosen = get_built_models(myBiomodModelOut_semibalanus_GBIF_eDNA)[3],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)
set.seed(seed.val.set)

# Model ensemble models
myBiomodEM_semibalanus_GBIF_eDNA <- BIOMOD_EnsembleModeling(
  bm.mod = myBiomodModelOut_semibalanus_GBIF_eDNA,
  models.chosen = 'all',
  em.by = 'all',
  em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
  metric.select = c('TSS', 'ROC'),
  metric.select.thresh = c(TSS_thres, ROC_thres),
  metric.eval = c('TSS', 'ROC'),
  var.import = 2,
  EMci.alpha = 0.05,
  EMwmean.decay = 'proportional',
  seed.val = seed.val.set
)

myBiomodEM_semibalanus_GBIF_eDNA
# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Ensemble-GBIF-eDNA/"
figure_path <- "Figures/Semi.bal/Ensemble-GBIF-eDNA/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_semibalanus_GBIF_eDNA)
var_imp <- get_variables_importance(myBiomodEM_semibalanus_GBIF_eDNA)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, group.by = 'full.name')

plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, dataset = "evaluation")

ensemble_eval_plot_SB_CE <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_SB_CE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, group.by = c('full.name', 'full.name'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 10, height = 5)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, c('expl.var', 'full.name', 'full.name'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'merged.by.run'))

plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Response curves
models_to_plot <- models_to_plot <- get_built_models(myBiomodEM_semibalanus_GBIF_eDNA)[c(1, 6, 7)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, 
                      models.chosen = get_built_models(myBiomodEM_semibalanus_GBIF_eDNA)[7],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)

Model 3 (SB baseline)

set.seed(seed.val.set)

# Model single models
myBiomodModelOut_semibalanus_baseline <- BIOMOD_Modeling(
  bm.format = myBiomodData_semibalanus_baseline,
  models = c('GAM','GLM','RF','MAXENT'),
  modeling.id = "Semibalanus_baseline",
  CV.strategy = 'random',
  CV.nb.rep = 2,
  CV.perc = 0.7,
  OPT.strategy = 'default',
  var.import = 2,
  metric.eval = c('TSS', 'ROC'),
  seed.val = seed.val.set
)

get_options(myBiomodModelOut_semibalanus_baseline)

myBiomodModelOut_semibalanus_baseline
# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Individual-baseline/"
figure_path <- "Figures/Semi.bal/Individual-baseline/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_semibalanus_baseline)
var_imp <- get_variables_importance(myBiomodModelOut_semibalanus_baseline)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_baseline, dataset = "calibration")

algorithm_cal_plot_SB_BASE <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_SB_BASE, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_baseline, dataset = "evaluation")

algorithm_eval_plot_SB_BASE <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_SB_BASE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_baseline, group.by = c('algo', 'algo'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)

# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_baseline, group.by = c('algo', 'run'))

plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_baseline, group.by = c('expl.var', 'algo', 'algo'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_baseline, group.by = c('expl.var', 'algo', 'run'))

plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)

# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_semibalanus_baseline)[c(1:3, 12:14)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_baseline, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_baseline, 
                      models.chosen = get_built_models(myBiomodModelOut_semibalanus_baseline)[3],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)
set.seed(seed.val.set)

# Model ensemble models
myBiomodEM_semibalanus_baseline <- BIOMOD_EnsembleModeling(
  bm.mod = myBiomodModelOut_semibalanus_baseline,
  models.chosen = 'all',
  em.by = 'all',
  em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
  metric.select = c('TSS', 'ROC'),
  metric.select.thresh = c(TSS_thres, ROC_thres),
  metric.eval = c('TSS', 'ROC'),
  var.import = 2,
  EMci.alpha = 0.05,
  EMwmean.decay = 'proportional',
  seed.val = seed.val.set
)

myBiomodEM_semibalanus_baseline
# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Ensemble-baseline/"
figure_path <- "Figures/Semi.bal/Ensemble-baseline/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_semibalanus_baseline)
var_imp <- get_variables_importance(myBiomodEM_semibalanus_baseline)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_baseline, group.by = 'full.name')

plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_baseline, dataset = "evaluation")

ensemble_eval_plot_SB_BASE <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_SB_BASE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_semibalanus_baseline, group.by = c('full.name', 'full.name'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 10, height = 5)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_baseline, c('expl.var', 'full.name', 'full.name'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_baseline, group.by = c('expl.var', 'algo', 'merged.by.run'))

plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Response curves
models_to_plot <- models_to_plot <- get_built_models(myBiomodEM_semibalanus_baseline)[c(1, 6, 7)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_baseline, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_baseline, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_baseline, 
                      models.chosen = get_built_models(myBiomodEM_semibalanus_baseline)[7],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)

Semibalanus assessment figures

SB_assessment_plot <- cowplot::plot_grid(
  ensemble_eval_plot_SB_CO,
  ensemble_eval_plot_SB_CE,
  algorithm_cal_plot_SB_CO,
  algorithm_cal_plot_SB_CE,
  algorithm_eval_plot_SB_CO,
  algorithm_eval_plot_SB_CE,
  ncol = 2, 
  labels = c("a) Ensemble (GBIF + GBIF)", 
             "b) Ensemble (GBIF-eDNA)", 
             "c) Algorithms (Calibration, GBIF + GBIF)", 
             "d) Algorithms (Calibration, GBIF-eDNA)",
             "e) Algorithms (Evaluation, GBIF + GBIF)",
             "f) Algorithms (Evaluation, GBIF-eDNA)"), 
  align = "v",
  label_x = 0,   # left align labels (0 = far left, 1 = far right)
  label_y = 1,   # push labels to the very top
  hjust = 0,     # left-justify text relative to x-position
  vjust = 1,      # align with top edge
  label_size = 10 
)

SB_assessment_plot

# Save the plot
ggsave(SB_assessment_plot, filename = "Figures/SB_assessment_plot.png", width = 12, height = 15, dpi = 300)

Model 4 (Chthamalus - GBIF + GBIF)

set.seed(seed.val.set)

# Model single models
myBiomodModelOut_chthamalus_GBIF <- BIOMOD_Modeling(
  bm.format = myBiomodData_chthamalus_GBIF,
  models = c('GAM','GLM','RF','MAXENT'),
  modeling.id = "chthamalus_GBIF_GBIF",
  CV.strategy = 'random',
  CV.nb.rep = 2,
  CV.perc = 0.7,
  OPT.strategy = 'default',
  var.import = 2,
  metric.eval = c('TSS', 'ROC'),
  seed.val = seed.val.set
)

get_options(myBiomodModelOut_chthamalus_GBIF)

myBiomodModelOut_chthamalus_GBIF
# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Individual-GBIF-GBIF/"
figure_path <- "Figures/Chtham.mont/Individual-GBIF-GBIF/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_chthamalus_GBIF)
var_imp <- get_variables_importance(myBiomodModelOut_chthamalus_GBIF)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_GBIF, dataset = "calibration")

algorithm_cal_plot_CM_CO <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_CM_CO, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_GBIF, dataset = "evaluation")

algorithm_eval_plot_CM_CO <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_CM_CO, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF, group.by = c('algo', 'algo'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)

# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF, group.by = c('algo', 'run'))

plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF, group.by = c('expl.var', 'algo', 'algo'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF, group.by = c('expl.var', 'algo', 'run'))

plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)

# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_chthamalus_GBIF)[c(1:3, 12:14)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF, 
                      models.chosen = get_built_models(myBiomodModelOut_chthamalus_GBIF)[3],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)
set.seed(seed.val.set)

# Model ensemble models (takes around 30 mins)
myBiomodEM_chthamalus_GBIF_GBIF <- BIOMOD_EnsembleModeling(
  bm.mod = myBiomodModelOut_chthamalus_GBIF,
  models.chosen = 'all',
  em.by = 'all',
  em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
  metric.select = c('TSS', 'ROC'),
  metric.select.thresh = c(TSS_thres, ROC_thres),
  metric.eval = c('TSS', 'ROC'),
  var.import = 2,
  EMci.alpha = 0.05,
  EMwmean.decay = 'proportional',
  seed.val = seed.val.set
)

myBiomodEM_chthamalus_GBIF_GBIF
# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Ensemble-GBIF-GBIF/"
figure_path <- "Figures/Chtham.mont/Ensemble-GBIF-GBIF/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_chthamalus_GBIF_GBIF)
var_imp <- get_variables_importance(myBiomodEM_chthamalus_GBIF_GBIF)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, group.by = 'full.name')

plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 10, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, dataset = "evaluation")

ensemble_eval_plot_CM_CO <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_CM_CO, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, group.by = c('full.name', 'full.name'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, c('expl.var', 'full.name', 'full.name'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Type")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, group.by = c('expl.var', 'algo', 'merged.by.run'))

plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Type")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Response curves
models_to_plot <- get_built_models(myBiomodEM_chthamalus_GBIF_GBIF)[c(1, 6, 7)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, 
                      models.chosen = get_built_models(myBiomodEM_chthamalus_GBIF_GBIF)[7],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)

Model 5 (Chthamalus - GBIF + eDNA)

set.seed(seed.val.set)

# Model single models
myBiomodModelOut_chthamalus_GBIF_eDNA <- BIOMOD_Modeling(
  bm.format = myBiomodData_chthamalus_GBIF_eDNA,
  models = c('GAM','GLM','RF','MAXENT'),
  modeling.id = "chthamalus_GBIF_eDNA",
  CV.strategy = 'random',
  CV.nb.rep = 2,
  CV.perc = 0.7,
  OPT.strategy = 'default',
  var.import = 2,
  metric.eval = c('TSS', 'ROC'),
  seed.val = seed.val.set
)

get_options(myBiomodModelOut_chthamalus_GBIF_eDNA)

myBiomodModelOut_chthamalus_GBIF_eDNA
# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Individual-GBIF-eDNA/"
figure_path <- "Figures/Chtham.mont/Individual-GBIF-eDNA/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_chthamalus_GBIF_eDNA)
var_imp <- get_variables_importance(myBiomodModelOut_chthamalus_GBIF_eDNA)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, dataset = "calibration")

algorithm_cal_plot_CM_CE <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_CM_CE, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, dataset = "evaluation")

algorithm_eval_plot_CM_CE <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_CM_CE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, group.by = c('algo', 'algo'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)

# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, group.by = c('algo', 'run'))

plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'algo'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'run'))

plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)

# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_chthamalus_GBIF_eDNA)[c(1:3, 12:14)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, 
                      models.chosen = get_built_models(myBiomodModelOut_chthamalus_GBIF_eDNA)[3],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)
set.seed(seed.val.set)

# Model ensemble models (takes around 30 mins)
myBiomodEM_chthamalus_GBIF_eDNA <- BIOMOD_EnsembleModeling(
  bm.mod = myBiomodModelOut_chthamalus_GBIF_eDNA,
  models.chosen = 'all',
  em.by = 'all',
  em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
  metric.select = c('TSS', 'ROC'),
  metric.select.thresh = c(TSS_thres, ROC_thres),
  metric.eval = c('TSS', 'ROC'),
  var.import = 2,
  EMci.alpha = 0.05,
  EMwmean.decay = 'proportional',
  seed.val = seed.val.set
)

myBiomodEM_chthamalus_GBIF_eDNA
# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Ensemble-GBIF-eDNA/"
figure_path <- "Figures/Chtham.mont/Ensemble-GBIF-eDNA/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_chthamalus_GBIF_eDNA)
var_imp <- get_variables_importance(myBiomodEM_chthamalus_GBIF_eDNA)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, group.by = 'full.name')

plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 10, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, dataset = "evaluation")

ensemble_eval_plot_CM_CE <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_CM_CE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, group.by = c('full.name', 'full.name'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 10, height = 5)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, c('expl.var', 'full.name', 'full.name'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Type")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'merged.by.run'))

plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Type")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Response curves
models_to_plot <- get_built_models(myBiomodEM_chthamalus_GBIF_eDNA)[c(1, 6, 7)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, 
                      models.chosen = get_built_models(myBiomodEM_chthamalus_GBIF_eDNA)[7],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)

Model 6 (CM - baseline)

set.seed(seed.val.set)

# Model single models
myBiomodModelOut_chthamalus_baseline <- BIOMOD_Modeling(
  bm.format = myBiomodData_chthamalus_baseline,
  models = c('GAM','GLM','RF','MAXENT'),
  modeling.id = "chthamalus_baseline",
  CV.strategy = 'random',
  CV.nb.rep = 2,
  CV.perc = 0.7,
  OPT.strategy = 'default',
  var.import = 2,
  metric.eval = c('TSS', 'ROC'),
  seed.val = seed.val.set
)

get_options(myBiomodModelOut_chthamalus_baseline)

myBiomodModelOut_chthamalus_baseline
# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Individual-baseline/"
figure_path <- "Figures/Chtham.mont/Individual-baseline/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_chthamalus_baseline)
var_imp <- get_variables_importance(myBiomodModelOut_chthamalus_baseline)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_baseline, dataset = "calibration")

algorithm_cal_plot_cm_BASE <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_cm_BASE, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_baseline, dataset = "evaluation")

algorithm_eval_plot_cm_BASE <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_cm_BASE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_baseline, group.by = c('algo', 'algo'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)

# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_baseline, group.by = c('algo', 'run'))

plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_baseline, group.by = c('expl.var', 'algo', 'algo'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_baseline, group.by = c('expl.var', 'algo', 'run'))

plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)

# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_chthamalus_baseline)[c(1:3, 12:14)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_baseline, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_baseline, 
                      models.chosen = get_built_models(myBiomodModelOut_chthamalus_baseline)[3],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)
set.seed(seed.val.set)

# Model ensemble models
myBiomodEM_chthamalus_baseline <- BIOMOD_EnsembleModeling(
  bm.mod = myBiomodModelOut_chthamalus_baseline,
  models.chosen = 'all',
  em.by = 'all',
  em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
  metric.select = c('TSS', 'ROC'),
  metric.select.thresh = c(TSS_thres, ROC_thres),
  metric.eval = c('TSS', 'ROC'),
  var.import = 2,
  EMci.alpha = 0.05,
  EMwmean.decay = 'proportional',
  seed.val = seed.val.set
)

myBiomodEM_chthamalus_baseline
# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Ensemble-baseline/"
figure_path <- "Figures/Chtham.mont/Ensemble-baseline/"

# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)

# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_chthamalus_baseline)
var_imp <- get_variables_importance(myBiomodEM_chthamalus_baseline)

write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)

# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_baseline, group.by = 'full.name')

plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab

write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)

# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_baseline, dataset = "evaluation")

ensemble_eval_plot_cm_BASE <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_cm_BASE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)

# calibration boxplots - overall 
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_chthamalus_baseline, group.by = c('full.name', 'full.name'))

plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
                                                y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 10, height = 5)

# Variable importance boxplots - overall 
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_baseline, c('expl.var', 'full.name', 'full.name'))

plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_baseline, group.by = c('expl.var', 'algo', 'merged.by.run'))

plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)

# Response curves
models_to_plot <- models_to_plot <- get_built_models(myBiomodEM_chthamalus_baseline)[c(1, 6, 7)]

# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_baseline, 
                      models.chosen = models_to_plot,
                      fixed.var = 'median')

tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)

# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_baseline, 
                      models.chosen = models_to_plot,
                      fixed.var = 'min')

tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)

# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_baseline, 
                      models.chosen = get_built_models(myBiomodEM_chthamalus_baseline)[7],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)

Chthamalus assessment figures

CM_assessment_plot <- cowplot::plot_grid(
  ensemble_eval_plot_CM_CO,
  ensemble_eval_plot_CM_CE,
  algorithm_cal_plot_CM_CO,
  algorithm_cal_plot_CM_CE,
  algorithm_eval_plot_CM_CO,
  algorithm_eval_plot_CM_CE,
  ncol = 2, 
  labels = c(
    "a) Ensemble (GBIF + GBIF)", 
    "b) Ensemble (GBIF-eDNA)", 
    "c) Algorithms (Calibration, GBIF + GBIF)", 
    "d) Algorithms (Calibration, GBIF-eDNA)",
    "e) Algorithms (Evaluation, GBIF + GBIF)",
    "f) Algorithms (Evaluation, GBIF-eDNA)"
  ),
  align = "v",
  label_x = 0,   # left align labels (0 = far left, 1 = far right)
  label_y = 1,   # push labels to the very top
  hjust = 0,     # left-justify text relative to x-position
  vjust = 1,      # align with top edge
  label_size = 10 
)


CM_assessment_plot

# Save the plot
ggsave(CM_assessment_plot, filename = "Figures/CM_assessment_plot.png", width = 12, height = 15, dpi = 300)

Current projections

We are going to run projections and mask our rocky substrate layer over projections. This ensures that any areas without rocky habitats won’t be included in our final maps.

Projection 1 (Semibalanus - GBIF + GBIF)

#just using mean models from now on

myBiomodEMProj_semibalanus_visual_GBIF_GBIF <- BIOMOD_EnsembleForecasting(
  bm.em = myBiomodEM_semibalanus_GBIF_GBIF,
  proj.name = 'CurrentEM.semibalanus.GBIF',
  new.env = enviro_vars_current,
  models.chosen = c(
    'Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo', 
    'Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo')
  #metric.binary = 'all',
  #metric.filter = 'all'
)

myBiomodEMProj_semibalanus_visual_GBIF_GBIF

# visualise
proj_semibalanus_visual <- get_predictions(myBiomodEMProj_semibalanus_visual_GBIF_GBIF) # returns a rasterLayer
plot(proj_semibalanus_visual)

# after rocky mask
proj_semibalanus_visual_masked <- mask(proj_semibalanus_visual, expanded_rock)
plot(proj_semibalanus_visual_masked)

# Choose the first layer
r_layer <- proj_semibalanus_visual_masked[[1]] 

# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
  filter(!(x > -8.2 & x < -5.3 &
           y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland 
  filter(!(x > -8.0 & x < -6.0 &
           y > 55.1 & y < 55.6))

colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_SB_visual <- r_df

# plot
semibalanus_visual_current_plot_TSS <- ggplot() +
  
 geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
  scale_fill_viridis_c(option = "viridis", 
                       na.value = "transparent", 
                       limits = c(0, 1)) +
  coord_sf(
    xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  labs(fill = NULL) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),   # ~80% of plotting height
      barwidth  = unit(0.02, "npc"),  # slim vertical bar
      title.position = "top"
    )
  ) +
  theme(
    legend.position = "right",        # place bar to the right
    axis.title.x = element_blank(),   # remove x-axis title
    axis.title.y = element_blank(),   # remove y-axis title
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

semibalanus_visual_current_plot_TSS

Projection 2 (Semibalanus - GBIF + eDNA)

myBiomodEMProj_semibalanus_GBIF_eDNA <- BIOMOD_EnsembleForecasting(
  bm.em = myBiomodEM_semibalanus_GBIF_eDNA,
  proj.name = 'CurrentEM.semibalanus.GBIF.eDNA',
  new.env = enviro_vars_current,
  models.chosen = c(
    'Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo', 
    'Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo')
  #metric.binary = 'all',
  #metric.filter = 'all'
)

myBiomodEMProj_semibalanus_GBIF_eDNA

# visualise
proj_semibalanus_combined <- get_predictions(myBiomodEMProj_semibalanus_GBIF_eDNA) # returns a rasterLayer
plot(proj_semibalanus_combined)

# after rocky mask
proj_semibalanus_combined_masked <- mask(proj_semibalanus_combined, expanded_rock)
plot(proj_semibalanus_combined_masked)

# Choose the first layer
r_layer <- proj_semibalanus_combined_masked[[1]] 

# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
  filter(!(x > -8.2 & x < -5.3 &
           y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland 
  filter(!(x > -8.0 & x < -6.0 &
           y > 55.1 & y < 55.6))

colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_SB_combined <- r_df

# plot
semibalanus_combined_current_plot_TSS <- ggplot() +
  
 geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
  scale_fill_viridis_c(option = "viridis", 
                       na.value = "transparent", 
                       limits = c(0, 1)) +
  labs(fill = NULL) + 
  coord_sf(
    xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),   # ~80% of plotting height
      barwidth  = unit(0.02, "npc")   # slim vertical bar
    )
  ) +
  theme(
    legend.position = "right",        # put the colorbar on the right
    axis.title.x = element_blank(),   # remove x-axis title
    axis.title.y = element_blank(),   # remove y-axis title
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

semibalanus_combined_current_plot_TSS

Projection 3 (Chthamalus - GBIF + GBIF)

myBiomodEMProj_chthamalus_GBIF_GBIF <- BIOMOD_EnsembleForecasting(
  bm.em = myBiomodEM_chthamalus_GBIF_GBIF,
  proj.name = 'CurrentEM.chthamalus.GBIF',
  new.env = enviro_vars_current,
  models.chosen = c(
    'Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo', 
    'Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo')
  #metric.binary = 'all',
  #metric.filter = 'all'
)

myBiomodEMProj_chthamalus_GBIF_GBIF

# visualise
proj_chthamalus_visual <- get_predictions(myBiomodEMProj_chthamalus_GBIF_GBIF) # returns a rasterLayer
plot(proj_chthamalus_visual)

# after rocky mask
proj_chthamalus_visual_masked <- mask(proj_chthamalus_visual, expanded_rock)
plot(proj_chthamalus_visual_masked)

# Choose the first layer
r_layer <- proj_chthamalus_visual_masked[[1]] 

# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
  filter(!(x > -8.2 & x < -5.3 &
           y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland 
  filter(!(x > -8.0 & x < -6.0 &
           y > 55.1 & y < 55.6))

colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_CM_visual <- r_df

# plot
chthamalus_visual_current_plot_TSS <- ggplot() +

  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
  scale_fill_viridis_c(option = "viridis", 
                       na.value = "transparent", 
                       limits = c(0, 1)) +
  labs(fill = NULL) + 
  coord_sf(
       xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),   # ~80% of plotting height
      barwidth  = unit(0.02, "npc")   # slim vertical bar
    )
  ) +
  theme(
    legend.position = "right",        # show the colorbar on the right
    axis.title.x = element_blank(),   # remove x-axis title
    axis.title.y = element_blank(),   # remove y-axis title
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

chthamalus_visual_current_plot_TSS

Projection 4 (Chthamalus - GBIF + eDNA)

myBiomodEMProj_chthamalus_GBIF_eDNA <- BIOMOD_EnsembleForecasting(
  bm.em = myBiomodEM_chthamalus_GBIF_eDNA,
  proj.name = 'CurrentEM.chthamalus.GBIF.eDNA',
  new.env = enviro_vars_current,
  models.chosen = c(
    'Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo', 
    'Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo')
  #metric.binary = 'all',
  #metric.filter = 'all'
)

myBiomodEMProj_chthamalus_GBIF_eDNA

# visualise
proj_chthamalus_combined <- get_predictions(myBiomodEMProj_chthamalus_GBIF_eDNA) # returns a rasterLayer
plot(proj_chthamalus_combined)

# after rocky mask
proj_chthamalus_combined_masked <- mask(proj_chthamalus_combined, expanded_rock)
plot(proj_chthamalus_combined_masked)

# Choose the first layer
r_layer <- proj_chthamalus_combined_masked[[1]] 

# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
  filter(!(x > -8.2 & x < -5.3 &
           y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland 
  filter(!(x > -8.0 & x < -6.0 &
           y > 55.1 & y < 55.6))

colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_CM_combined <- r_df

#plot
chthamalus_combined_current_plot_TSS <- ggplot() +

  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
  scale_fill_viridis_c(option = "viridis", 
                       na.value = "transparent", 
                       limits = c(0, 1)) +
  labs(fill = NULL) +   # 🚨 remove legend title
  coord_sf(
        xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),   # ~80% of plot height
      barwidth  = unit(0.02, "npc")   # slim vertical bar
    )
  ) +
  theme(
    legend.position = "right",        # show colorbar on the right
    axis.title.x = element_blank(),   # remove x-axis title
    axis.title.y = element_blank(),   # remove y-axis title
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

chthamalus_combined_current_plot_TSS

Difference maps

#join the data together
colnames(r_df_SB_visual) <- c("x", "y", "suitability_visual", "suitability_divide_visual")
colnames(r_df_SB_combined) <- c("x", "y", "suitability_combined", "suitability_divide_combined")

r_df_SB_diff <- left_join(r_df_SB_visual, r_df_SB_combined)
r_df_SB_diff$diff <- r_df_SB_diff$suitability_divide_combined - r_df_SB_diff$suitability_divide_visual

mean(r_df_SB_diff$diff)
[1] 0.005617137
# map
SB_diff_plot <- ggplot() +
  
  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df_SB_diff, aes(x = x, y = y, fill = diff)) +
  
  scale_fill_gradient2(
    low = "#F8333C",      # negative values
    mid = "white",    # 0
    high = "#0273CA",    # positive values
    midpoint = 0,     # center the scale at 0
    na.value = "transparent",
    breaks = c(min(r_df_SB_diff$diff, na.rm = TRUE), 0, max(r_df_SB_diff$diff, na.rm = TRUE)),
  labels = c("Loss", "No change", "Gain")
  ) +
  
  labs(fill = NULL) +
  coord_sf(
    xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),
      barwidth  = unit(0.02, "npc")
    )
  ) +
  theme(
    legend.position = "right",
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

SB_diff_plot

#join the data together
colnames(r_df_CM_visual) <- c("x", "y", "suitability_visual", "suitability_divide_visual")
colnames(r_df_CM_combined) <- c("x", "y", "suitability_combined", "suitability_divide_combined")

r_df_CM_diff <- left_join(r_df_CM_visual, r_df_CM_combined)
r_df_CM_diff$diff <- r_df_CM_diff$suitability_divide_combined - r_df_CM_diff$suitability_divide_visual

mean(r_df_CM_diff$diff)
[1] 0.0125705
# map
CM_diff_plot <- ggplot() +
  
  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df_CM_diff, aes(x = x, y = y, fill = diff)) +
  
  scale_fill_gradient2(
    low = "#F8333C",      # negative values
    mid = "white",    # 0
    high = "#0273CA",    # positive values
    midpoint = 0,     # center the scale at 0
    na.value = "transparent",
    breaks = c(min(r_df_CM_diff$diff, na.rm = TRUE), 0, max(r_df_CM_diff$diff, na.rm = TRUE)),
  labels = c("Loss", "No change", "Gain")
  ) +
  
  labs(fill = NULL) +
  coord_sf(
    xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),
      barwidth  = unit(0.02, "npc")
    )
  ) +
  theme(
    legend.position = "right",
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

CM_diff_plot

Joint maps

Let’s plot all our current projections together into one plot.

# Left-aligned species labels
sb_label <- ggdraw() + 
  draw_label("Semibalanus balanoides", 
             fontface = "italic", 
             size = 14,
             hjust = 0,      # left-aligned
             x = 0.02)       # move slightly from edge

cm_label <- ggdraw() + 
  draw_label("Chthamalus montagui", 
             fontface = "italic", 
             size = 14,
             hjust = 0, 
             x = 0.02)

#top row
SB_row <- plot_grid(
  semibalanus_visual_current_plot_TSS,
  semibalanus_combined_current_plot_TSS,
  SB_diff_plot,
  ncol = 3,
  labels = c("a) GBIF baseline + GBIF additional", 
             "b) GBIF baseline + eDNA", 
             "c) Difference"),
  align = "hv",
    label_x = 0,   # x = 0 anchors to left edge
  label_y = 1,   # y = 1 anchors to top edge
  hjust = 0,     # left-justify long text
  vjust = 1     # top-justify vertically
)

#bottom row
CM_row <- plot_grid(
  chthamalus_visual_current_plot_TSS,
  chthamalus_combined_current_plot_TSS, 
  CM_diff_plot,
  ncol = 3,
  labels = c("d) GBIF baseline + GBIF additional", 
             "e) GBIF baseline + eDNA", 
             "f) Difference"),
  align = "hv",
  label_x = 0,   # x = 0 anchors to left edge
  label_y = 1,   # y = 1 anchors to top edge
  hjust = 0,     # left-justify long text
  vjust = 1     # top-justify vertically
)


# Combine rows with labels (same layout as before)
current_plot_2 <- plot_grid(
  sb_label, SB_row,
  cm_label, CM_row,
  ncol = 1,
  rel_heights = c(0.08, 1, 0.08, 1)
)

current_plot_2

# Save the plot
ggsave(current_plot_2, filename = "Figures/current_plot_2.png", width = 15, height = 13, dpi = 300)

Forecasting change

Forecast 1 (Semibalanus - GBIF + GBIF)

myBiomodEMProjFuture_semibalanus_GBIF_GBIF <- BIOMOD_EnsembleForecasting(
  bm.em = myBiomodEM_semibalanus_GBIF_GBIF,
  proj.name = 'FutureEM.semibalanus.visual',
  new.env = enviro_vars_future,
  models.chosen = c(
    'Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
    "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo"
  )
  #metric.binary = 'all',
  #metric.filter = 'all'
)

-=-=-=-=-=-=-=-=-=-=-=-= Do Ensemble Models Projection -=-=-=-=-=-=-=-=-=-=-=-=

Creating suitable Workdir...

-=-=-=-=-=-=-=-=-=-=-=-=-= Do Single Models Projection -=-=-=-=-=-=-=-=-=-=-=-=-=

    > Projecting Semibalanus.balanoides_PA1_RUN1_RF ...
    > Projecting Semibalanus.balanoides_PA1_RUN1_MAXENT ...
    > Projecting Semibalanus.balanoides_PA1_RUN2_RF ...
    > Projecting Semibalanus.balanoides_PA1_RUN2_MAXENT ...
    > Projecting Semibalanus.balanoides_PA1_RUN1_GAM ...
    > Projecting Semibalanus.balanoides_PA1_RUN1_GLM ...
    > Projecting Semibalanus.balanoides_PA1_RUN2_GAM ...
    > Projecting Semibalanus.balanoides_PA1_RUN2_GLM ...

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

    > Projecting Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo ...
    > Projecting Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo ...
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
myBiomodEMProjFuture_semibalanus_GBIF_GBIF

-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.projection.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=

Projection directory : ./Semibalanus.balanoides/FutureEM.semibalanus.visual


sp.name : Semibalanus.balanoides

expl.var.names : ocean_temp ph wave_fetch


modeling.id : Semibalanus_GBIF_GBIF ( 
./Semibalanus.balanoides/Semibalanus.balanoides.Semibalanus_GBIF_GBIF.ensemble.models.out 
)

models.projected : 
Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo, Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
plot(myBiomodEMProjFuture_semibalanus_GBIF_GBIF)

# visualise
future_semibalanus_visual <- get_predictions(myBiomodEMProjFuture_semibalanus_GBIF_GBIF) # returns a rasterLayer
plot(future_semibalanus_visual)

# after rocky mask
future_semibalanus_visual_masked <- mask(future_semibalanus_visual, expanded_rock)
plot(future_semibalanus_visual_masked)

# Choose the first layer
r_layer <- future_semibalanus_visual_masked[[1]] 

# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
  filter(!(x > -8.2 & x < -5.3 &
           y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland 
  filter(!(x > -8.0 & x < -6.0 &
           y > 55.1 & y < 55.6))

colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_SM_visual_future <- r_df

#plot
semibalanus_visual_future_plot <- ggplot() +

  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
  scale_fill_viridis_c(option = "viridis", 
                       na.value = "transparent", 
                       limits = c(0, 1)) +
  labs(fill = NULL) +   # remove legend title
  coord_sf(
        xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),   # ~80% of plot height
      barwidth  = unit(0.02, "npc")   # slim vertical bar
    )
  ) +
  theme(
    legend.position = "right",        # show colorbar on the right
    axis.title.x = element_blank(),   # remove x-axis title
    axis.title.y = element_blank(),   # remove y-axis title
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

semibalanus_visual_future_plot

Forecast 2 (Semibalanus - GBIF + eDNA)

myBiomodEMProjFuture_semibalanus_GBIF_eDNA <- BIOMOD_EnsembleForecasting(
  bm.em = myBiomodEM_semibalanus_GBIF_eDNA,
  proj.name = 'FutureEM.semibalanus.combined',
  new.env = enviro_vars_future,
  models.chosen = c(
    'Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
    "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo"
  )
  #metric.binary = 'all',
  #metric.filter = 'all'
)

-=-=-=-=-=-=-=-=-=-=-=-= Do Ensemble Models Projection -=-=-=-=-=-=-=-=-=-=-=-=

Creating suitable Workdir...

-=-=-=-=-=-=-=-=-=-=-=-=-= Do Single Models Projection -=-=-=-=-=-=-=-=-=-=-=-=-=

    > Projecting Semibalanus.balanoides_PA1_RUN1_RF ...
    > Projecting Semibalanus.balanoides_PA1_RUN1_MAXENT ...
    > Projecting Semibalanus.balanoides_PA1_RUN2_RF ...
    > Projecting Semibalanus.balanoides_PA1_RUN2_MAXENT ...
    > Projecting Semibalanus.balanoides_PA1_RUN1_GAM ...
    > Projecting Semibalanus.balanoides_PA1_RUN1_GLM ...
    > Projecting Semibalanus.balanoides_PA1_RUN2_GAM ...
    > Projecting Semibalanus.balanoides_PA1_RUN2_GLM ...

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

    > Projecting Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo ...
    > Projecting Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo ...
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
myBiomodEMProjFuture_semibalanus_GBIF_eDNA

-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.projection.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=

Projection directory : ./Semibalanus.balanoides/FutureEM.semibalanus.combined


sp.name : Semibalanus.balanoides

expl.var.names : ocean_temp ph wave_fetch


modeling.id : Semibalanus_GBIF_eDNA ( 
./Semibalanus.balanoides/Semibalanus.balanoides.Semibalanus_GBIF_eDNA.ensemble.models.out 
)

models.projected : 
Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo, Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
plot(myBiomodEMProjFuture_semibalanus_GBIF_eDNA)

# visualise
future_semibalanus_combined <- get_predictions(myBiomodEMProjFuture_semibalanus_GBIF_eDNA) # returns a rasterLayer
plot(future_semibalanus_combined)

# after rocky mask
future_semibalanus_combined_masked <- mask(future_semibalanus_combined, expanded_rock)
plot(future_semibalanus_combined_masked)

# Choose the first layer
r_layer <- future_semibalanus_combined_masked[[1]] 

# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
  filter(!(x > -8.2 & x < -5.3 &
           y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland 
  filter(!(x > -8.0 & x < -6.0 &
           y > 55.1 & y < 55.6))

colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_future_SB_combined_masked <- r_df

#plot
semibalanus_combined_future_plot <- ggplot() +

  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
  scale_fill_viridis_c(option = "viridis", 
                       na.value = "transparent", 
                       limits = c(0, 1)) +
  labs(fill = NULL) +   # remove legend title
  coord_sf(
        xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),   # ~80% of plot height
      barwidth  = unit(0.02, "npc")   # slim vertical bar
    )
  ) +
  theme(
    legend.position = "right",        # show colorbar on the right
    axis.title.x = element_blank(),   # remove x-axis title
    axis.title.y = element_blank(),   # remove y-axis title
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

semibalanus_combined_future_plot

Forecast 3 (Chthamalus - GBIF + GBIF)

myBiomodEMProjFuture_chthamalus_GBIF_GBIF<- BIOMOD_EnsembleForecasting(
  bm.em = myBiomodEM_chthamalus_GBIF_GBIF,
  proj.name = 'FutureEM.chthamalus.visual',
  new.env = enviro_vars_future,
  models.chosen = c(
    'Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
    "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo"
  ),
  metric.binary = 'all',
  metric.filter = 'all'
)

-=-=-=-=-=-=-=-=-=-=-=-= Do Ensemble Models Projection -=-=-=-=-=-=-=-=-=-=-=-=

Creating suitable Workdir...

-=-=-=-=-=-=-=-=-=-=-=-=-= Do Single Models Projection -=-=-=-=-=-=-=-=-=-=-=-=-=

    > Projecting Chthamalus.montagui_PA1_RUN1_GAM ...
    > Projecting Chthamalus.montagui_PA1_RUN1_GLM ...
    > Projecting Chthamalus.montagui_PA1_RUN1_RF ...
    > Projecting Chthamalus.montagui_PA1_RUN1_MAXENT ...
    > Projecting Chthamalus.montagui_PA1_RUN2_GAM ...
    > Projecting Chthamalus.montagui_PA1_RUN2_GLM ...
    > Projecting Chthamalus.montagui_PA1_RUN2_RF ...
    > Projecting Chthamalus.montagui_PA1_RUN2_MAXENT ...

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

    > Projecting Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo ...
    > Projecting Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo ...

    > Building TSS binaries / filtered
    > Building ROC binaries / filtered

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
myBiomodEMProjFuture_chthamalus_GBIF_GBIF

-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.projection.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=

Projection directory : ./Chthamalus.montagui/FutureEM.chthamalus.visual


sp.name : Chthamalus.montagui

expl.var.names : ocean_temp ph wave_fetch


modeling.id : chthamalus_GBIF_GBIF ( 
./Chthamalus.montagui/Chthamalus.montagui.chthamalus_GBIF_GBIF.ensemble.models.out 
)

models.projected : 
Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo, Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo

available binary projection : TSS, ROC

available filtered projection : TSS, ROC

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
plot(myBiomodEMProjFuture_chthamalus_GBIF_GBIF)

# visualise
future_chthamalus_visual <- get_predictions(myBiomodEMProjFuture_chthamalus_GBIF_GBIF) # returns a rasterLayer
plot(future_chthamalus_visual)

# after rocky mask
future_chthamalus_visual_masked <- mask(future_chthamalus_visual, expanded_rock)
plot(future_chthamalus_visual_masked)

# Choose the first layer
r_layer <- future_chthamalus_visual_masked[[1]] 

# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
  filter(!(x > -8.2 & x < -5.3 &
           y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland 
  filter(!(x > -8.0 & x < -6.0 &
           y > 55.1 & y < 55.6))

colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_future_CM_visual_masked <- r_df

#plot
chthamalus_visual_future_plot <- ggplot() +

  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
  scale_fill_viridis_c(option = "viridis", 
                       na.value = "transparent", 
                       limits = c(0, 1)) +
  labs(fill = NULL) +   # remove legend title
  coord_sf(
        xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),   # ~80% of plot height
      barwidth  = unit(0.02, "npc")   # slim vertical bar
    )
  ) +
  theme(
    legend.position = "right",        # show colorbar on the right
    axis.title.x = element_blank(),   # remove x-axis title
    axis.title.y = element_blank(),   # remove y-axis title
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

chthamalus_visual_future_plot

Forecast 4 (Chthamalus - GBIF + eDNA)

myBiomodEMProjFuture_chthamalus_GBIF_eDNA<- BIOMOD_EnsembleForecasting(
  bm.em = myBiomodEM_chthamalus_GBIF_eDNA,
  proj.name = 'FutureEM.chthamalus.combined',
  new.env = enviro_vars_future,
  models.chosen = c(
    'Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
    "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo"
  ),
  metric.binary = 'all',
  metric.filter = 'all'
)

-=-=-=-=-=-=-=-=-=-=-=-= Do Ensemble Models Projection -=-=-=-=-=-=-=-=-=-=-=-=

Creating suitable Workdir...

-=-=-=-=-=-=-=-=-=-=-=-=-= Do Single Models Projection -=-=-=-=-=-=-=-=-=-=-=-=-=

    > Projecting Chthamalus.montagui_PA1_RUN1_GAM ...
    > Projecting Chthamalus.montagui_PA1_RUN1_GLM ...
    > Projecting Chthamalus.montagui_PA1_RUN1_RF ...
    > Projecting Chthamalus.montagui_PA1_RUN1_MAXENT ...
    > Projecting Chthamalus.montagui_PA1_RUN2_GAM ...
    > Projecting Chthamalus.montagui_PA1_RUN2_GLM ...
    > Projecting Chthamalus.montagui_PA1_RUN2_RF ...
    > Projecting Chthamalus.montagui_PA1_RUN2_MAXENT ...

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

    > Projecting Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo ...
    > Projecting Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo ...

    > Building TSS binaries / filtered
    > Building ROC binaries / filtered

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
myBiomodEMProjFuture_chthamalus_GBIF_eDNA

-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.projection.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=

Projection directory : ./Chthamalus.montagui/FutureEM.chthamalus.combined


sp.name : Chthamalus.montagui

expl.var.names : ocean_temp ph wave_fetch


modeling.id : chthamalus_GBIF_eDNA ( 
./Chthamalus.montagui/Chthamalus.montagui.chthamalus_GBIF_eDNA.ensemble.models.out 
)

models.projected : 
Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo, Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo

available binary projection : TSS, ROC

available filtered projection : TSS, ROC

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
plot(myBiomodEMProjFuture_chthamalus_GBIF_eDNA)

# visualise
future_chthamalus_combined <- get_predictions(myBiomodEMProjFuture_chthamalus_GBIF_eDNA) # returns a rasterLayer
plot(future_chthamalus_combined)

# after rocky mask
future_chthamalus_combined_masked <- mask(future_chthamalus_combined, expanded_rock)
plot(future_chthamalus_combined_masked)

# Choose the first layer
r_layer <- future_chthamalus_combined_masked[[1]] 

# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
  filter(!(x > -8.2 & x < -5.3 &
           y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland 
  filter(!(x > -8.0 & x < -6.0 &
           y > 55.1 & y < 55.6))

colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_future_CM_combined_masked <- r_df

#plot
chthamalus_combined_future_plot <- ggplot() +

  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
  scale_fill_viridis_c(option = "viridis", 
                       na.value = "transparent", 
                       limits = c(0, 1)) +
  labs(fill = NULL) +   # remove legend title
  coord_sf(
        xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_minimal(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),   # ~80% of plot height
      barwidth  = unit(0.02, "npc")   # slim vertical bar
    )
  ) +
  theme(
    legend.position = "right",        # show colorbar on the right
    axis.title.x = element_blank(),   # remove x-axis title
    axis.title.y = element_blank(),   # remove y-axis title
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA)     
  )

chthamalus_combined_future_plot

Difference maps

#join the data together (future)
colnames(r_df_SM_visual_future) <- c("x", "y", "suitability_visual", "suitability_divide_visual")
colnames(r_df_future_SB_combined_masked) <- c("x", "y", "suitability_combined", "suitability_divide_combined")

r_df_SB_diff_future <- left_join(r_df_SM_visual_future, r_df_future_SB_combined_masked)
r_df_SB_diff_future$diff <- r_df_SB_diff_future$suitability_divide_combined - r_df_SB_diff_future$suitability_divide_visual

mean(r_df_SB_diff_future$diff)
[1] -0.05492191
# now for current vs future
SB_future_df <- r_df_future_SB_combined_masked %>% select(c(x,y,suitability_divide_combined))

SB_current_df <- r_df_SB_combined %>% select(c(x, y , suitability_divide_combined))

colnames(SB_future_df) <- c("x", "y", "suitability_future")
colnames(SB_current_df) <- c("x", "y", "suitability_current")

SB_current_future_diff <- left_join(SB_current_df, SB_future_df)
SB_current_future_diff$diff <- SB_current_future_diff$suitability_future - SB_current_future_diff$suitability_current

head(SB_current_future_diff)
           x      y suitability_current suitability_future   diff
1 -1.0749999 60.725               0.100              0.100  0.000
2 -0.9749999 60.725               0.182              0.092 -0.090
3 -0.8249999 60.725               0.258              0.114 -0.144
4 -0.7749999 60.725               0.124              0.069 -0.055
5 -0.7249999 60.725               0.070              0.131  0.061
6 -0.9749999 60.675               0.175              0.058 -0.117
mean(SB_current_future_diff$diff)
[1] -0.1877744
# map
SB_diff_plot_future <- ggplot() +
  
  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = SB_current_future_diff, aes(x = x, y = y, fill = diff)) +
  
  scale_fill_gradient2(
    low = "#F8333C",      # negative values
    mid = "white",    # 0
    high = "#0273CA",    # positive values
    midpoint = 0,     # center the scale at 0
    na.value = "transparent",
    breaks = c(min(SB_current_future_diff$diff, na.rm = TRUE), 0, max(SB_current_future_diff$diff, na.rm = TRUE)),
  labels = c("Loss", "No change", "Gain")
  ) +
  
  labs(fill = NULL#, 
       #title = "Semibalanus balanoides"
       ) +
  coord_sf(
    xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_bw(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),
      barwidth  = unit(0.02, "npc")
    )
  ) +
  theme(
    legend.position = "right",
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA),
    plot.title = element_text(size = 10, face = "italic")
  )

SB_diff_plot_future

#join the data together (future)
colnames(r_df_future_CM_visual_masked) <- c("x", "y", "suitability_visual", "suitability_divide_visual")
colnames(r_df_future_CM_combined_masked) <- c("x", "y", "suitability_combined", "suitability_divide_combined")

r_df_CM_diff_future <- left_join(r_df_future_CM_visual_masked, r_df_future_CM_combined_masked)
r_df_CM_diff_future$diff <- r_df_CM_diff_future$suitability_divide_combined - r_df_CM_diff_future$suitability_divide_visual

mean(r_df_CM_diff_future$diff)
[1] -0.1413449
# now for current vs future
CM_future_df <- r_df_future_CM_combined_masked %>% select(c(x,y,suitability_divide_combined))

CM_current_df <- r_df_CM_combined %>% select(c(x, y , suitability_divide_combined))

colnames(CM_future_df) <- c("x", "y", "suitability_future")
colnames(CM_current_df) <- c("x", "y", "suitability_current")

CM_current_future_diff <- left_join(CM_current_df, CM_future_df)
CM_current_future_diff$diff <- CM_current_future_diff$suitability_future - CM_current_future_diff$suitability_current

head(CM_current_future_diff)
           x      y suitability_current suitability_future  diff
1 -1.0749999 60.725               0.023              0.620 0.597
2 -0.9749999 60.725               0.058              0.704 0.646
3 -0.8249999 60.725               0.046              0.683 0.637
4 -0.7749999 60.725               0.013              0.582 0.569
5 -0.7249999 60.725               0.009              0.598 0.589
6 -0.9749999 60.675               0.061              0.716 0.655
mean(CM_current_future_diff$diff)
[1] 0.4080141
# map
CM_diff_plot_future <- ggplot() +
  
  geom_sf(data = uk, fill = "grey98", color = "lightgray") +
  geom_sf(data = other_maps, fill = "white", color = "grey92") +
  geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
  
  geom_tile(data = CM_current_future_diff, aes(x = x, y = y, fill = diff)) +
  
  scale_fill_gradient2(
    low = "#F8333C",      # negative values
    mid = "white",    # 0
    high = "#0273CA",    # positive values
    midpoint = 0,     # center the scale at 0
    na.value = "transparent",
    breaks = c(min(CM_current_future_diff$diff, na.rm = TRUE), 0, max(CM_current_future_diff$diff, na.rm = TRUE)),
  labels = c("Loss", "No change", "Gain")
  ) +
  
  labs(fill = NULL#, 
       #title = "Chthamalus montagui"
       ) +
  coord_sf(
    xlim = c(-8, 3),
    ylim = c(49.7, 61),
    expand = FALSE
  ) +
  theme_bw(base_size = 14) +
  guides(
    fill = guide_colorbar(
      barheight = unit(0.5, "npc"),
      barwidth  = unit(0.02, "npc")
    )
  ) +
  theme(
    legend.position = "right",
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.background = element_rect(fill = "white", colour = NA),  
    plot.background = element_rect(fill = "white", colour = NA),
    plot.title = element_text(size = 10, face = "italic")  
  )

CM_diff_plot_future

Joint maps

future_projections_plot <- cowplot::plot_grid(
  SB_diff_plot_future,
  CM_diff_plot_future,
  ncol = 2, align = "hv", label_fontface = "italic"
)

future_projections_plot

# Save the plot
ggsave(future_projections_plot, filename = "Figures/future_projections_plot.png", width = 14, height = 11, dpi = 300)

Lat slice plot

future_colour = "#FCAB10"
current_colour = "#2B9EB3"

#SB

# 1. Bin latitude (y) into 5 equal ranges
SB_current_future_diff <- SB_current_future_diff %>%
  mutate(lat_bin = cut(y, breaks = 5))

# 2. Reshape to long format for ggplot
SB_long <- SB_current_future_diff %>%
  pivot_longer(cols = starts_with("suitability"),
               names_to = "scenario",
               values_to = "suitability")

# 3. Compute mean suitability per latitude bin and scenario
SB_summary <- SB_long %>%
  group_by(lat_bin, scenario) %>%
  summarise(mean_suit = mean(suitability, na.rm = TRUE), 
            se = sd(suitability)/sqrt(n()),
            sd = sd(suitability))

#add species
SB_summary$species = "Semibalanus balanoides"
SB_long$species = "Semibalanus balanoides"

#CM

# 1. Bin latitude (y) into 5 equal ranges
CM_current_future_diff <- CM_current_future_diff %>%
  mutate(lat_bin = cut(y, breaks = 5))

# 2. Reshape to long format for ggplot
CM_long <- CM_current_future_diff %>%
  pivot_longer(cols = starts_with("suitability"),
               names_to = "scenario",
               values_to = "suitability")

# 3. Compute mean suitability per latitude bin and scenario
CM_summary <- CM_long %>%
  group_by(lat_bin, scenario) %>%
  summarise(mean_suit = mean(suitability, na.rm = TRUE), 
            se = sd(suitability)/sqrt(n()),
            sd = sd(suitability))

CM_summary$species = "Chthamalus montagui"
CM_long$species = "Chthamalus montagui"# long

long_both <- rbind(CM_long, SB_long)

lat_bin_summary <- rbind(SB_summary, CM_summary)
arrow_data <- lat_bin_summary %>%
  select(lat_bin, species, scenario, mean_suit) %>%
  pivot_wider(names_from = scenario, values_from = mean_suit) %>%
  rename(
    current = suitability_current,
    future  = suitability_future
  ) %>%
  mutate(
    future = if_else(future < current, future + 0.02, future - 0.02)
  )

# 1Ensure your main plotting dataset has species as a factor in the desired order
lat_bin_summary <- lat_bin_summary %>%
  mutate(species = factor(species, levels = c("Semibalanus balanoides", "Chthamalus montagui")))

# 2 Ensure arrow data also respects the same factor levels
arrow_data <- arrow_data %>%
  mutate(species = factor(species, levels = c("Semibalanus balanoides", "Chthamalus montagui")))

# 3 Plot
Lat_bin_line_plot <- ggplot(lat_bin_summary,
                            aes(
                              x = mean_suit,
                              y = lat_bin,
                              color = scenario,
                              shape = species
                            )) + 
  geom_segment(
    data = arrow_data,
    aes(x = current, xend = future, y = lat_bin, yend = lat_bin),
    arrow = arrow(length = unit(0.2, "cm")),
    color = "grey50"
  ) +
  geom_pointrange(aes(ymin = mean_suit, 
                      ymax = mean_suit),
                  size = 1) +
  facet_wrap(~species, ncol = 1) +   # facets follow factor order
  theme_bw() +
  theme(strip.text = element_text(face = "italic"),
        legend.position = "right") +
  labs(
    x = "Suitability score",
    y = "Latitude range",
    color = "Climate scenario",
    shape = "Species"
  ) +
  scale_color_manual(
    name = "Climate scenario",
    values = c("suitability_current" = current_colour,
               "suitability_future"  = future_colour),
    labels = c("suitability_current" = "Current",
               "suitability_future"  = "Future (2100)")
  ) +
  scale_y_discrete(
    labels = c(
      "(49.9,52]"   = "49.9 to 52°N",
      "(52,54.2]"   = "52 to 54.2°N",
      "(54.2,56.4]" = "54.2 to 56.4°N",
      "(56.4,58.6]" = "56.4 to 58.6°N",
      "(58.6,60.7]" = "58.6 to 60.7°N"
    ))

Lat_bin_line_plot

# Save the plot
ggsave(Lat_bin_line_plot, filename = "Figures/Lat_bin_line_plot.png", width = 8, height = 5, dpi = 300)
Lat_bin_line_plot_SB <- ggplot(lat_bin_summary %>% 
                                 filter(species == "Semibalanus balanoides"),
                            aes(
                              x = mean_suit,
                              y = lat_bin,
                              color = scenario
                            )) + 
  geom_pointrange(aes(ymin = mean_suit, 
                      ymax = mean_suit),
                  size = 1) +
  geom_segment(
    data = arrow_data %>% filter(species == "Semibalanus balanoides"),
    aes(x = current, xend = future, y = lat_bin, yend = lat_bin),
    arrow = arrow(length = unit(0.2, "cm")),
    color = "grey50"
  ) +
  theme_bw() +
  theme(strip.text = element_text(face = "italic"),
        legend.position = "right") +
  labs(
    x = "Suitability score",
    y = "Latitude range",
    color = "Climate scenario"
  ) +
  scale_color_manual(
    name = "Climate scenario",
    values = c("suitability_current" = current_colour,
               "suitability_future"  = future_colour),
    labels = c("suitability_current" = "Current",
               "suitability_future"  = "Future (2100)")
  ) +
  scale_y_discrete(
    labels = c(
      "(49.9,52]"   = "49.9 to 52°N",
      "(52,54.2]"   = "52 to 54.2°N",
      "(54.2,56.4]" = "54.2 to 56.4°N",
      "(56.4,58.6]" = "56.4 to 58.6°N",
      "(58.6,60.7]" = "58.6 to 60.7°N"
    ))+ xlim(0,1)

Lat_bin_line_plot_SB

Lat_bin_line_plot_CM <- ggplot(lat_bin_summary %>% 
                                 filter(species == "Chthamalus montagui"),
                            aes(
                              x = mean_suit,
                              y = lat_bin,
                              color = scenario
                            )) + 
  geom_pointrange(aes(ymin = mean_suit, 
                      ymax = mean_suit),
                  size = 1) +
  geom_segment(
    data = arrow_data %>% filter(species == "Chthamalus montagui"),
    aes(x = current, xend = future, y = lat_bin, yend = lat_bin),
    arrow = arrow(length = unit(0.2, "cm")),
    color = "grey50"
  ) +
  
  theme_bw() +
  theme(strip.text = element_text(face = "italic"),
        legend.position = "right") +
  labs(
    x = "Suitability score",
    y = "Latitude range",
    color = "Climate scenario"
  ) +
  scale_color_manual(
    name = "Climate scenario",
    values = c("suitability_current" = current_colour,
               "suitability_future"  = future_colour),
    labels = c("suitability_current" = "Current",
               "suitability_future"  = "Future (2100)")
  ) +
  scale_y_discrete(
    labels = c(
      "(49.9,52]"   = "49.9 to 52°N",
      "(52,54.2]"   = "52 to 54.2°N",
      "(54.2,56.4]" = "54.2 to 56.4°N",
      "(56.4,58.6]" = "56.4 to 58.6°N",
      "(58.6,60.7]" = "58.6 to 60.7°N"
    )) + xlim(0,1)

Lat_bin_line_plot_CM

# Left-aligned species labels
sb_label <- ggdraw() + 
  draw_label("Semibalanus balanoides", 
             fontface = "italic", 
             size = 14,
             hjust = 0,      # left-aligned
             x = 0.02)       # move slightly from edge

cm_label <- ggdraw() + 
  draw_label("Chthamalus montagui", 
             fontface = "italic", 
             size = 14,
             hjust = 0, 
             x = 0.02)
#top row
row_sb <- plot_grid(
  SB_diff_plot_future,
  Lat_bin_line_plot_SB,
  ncol = 2,
  labels = c("a", "b"),
  align = "hv"
)

#bottom row
row_cm <- plot_grid(
  CM_diff_plot_future,
  Lat_bin_line_plot_CM,
  ncol = 2,
  labels = c("c", "d"),
  align = "hv"
)


# Combine rows with labels (same layout as before)
forecast_plot_2 <- plot_grid(
  sb_label, row_sb,
  cm_label, row_cm,
  ncol = 1,
  rel_heights = c(0.08, 1, 0.08, 1),
  rel_widths = c(0.3, 1)
)

forecast_plot_2

# Save
ggsave(forecast_plot_2, filename = "Figures/forecast_plot2.png", width = 12, height = 8, dpi = 300)

Plots for write-up

Now let’s make our final comparison plots for the write-up. These mainly consist of boxpots.

Evaluation plots

#semibalanus

eval.semi.bal.individual.visual <- read.csv("Data/Processed_Data/Semi.bal/Individual-GBIF-GBIF/Evaluations_All.csv", row.names = 1)
eval.semi.bal.individual.visual$model_type = "individual_algo"
eval.semi.bal.individual.visual$dataset = "GBIF + GBIF"
eval.semi.bal.individual.visual$species = "Semibalanus balanoides"

eval.semi.bal.ensemble.visual <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-GBIF-GBIF/Evaluations_All.csv", row.names = 1)
eval.semi.bal.ensemble.visual$model_type = "ensemble"
eval.semi.bal.ensemble.visual$dataset = "GBIF + GBIF"
eval.semi.bal.ensemble.visual$species = "Semibalanus balanoides"

eval.semi.bal.individual.combined <- read.csv("Data/Processed_Data/Semi.bal/Individual-GBIF-eDNA/Evaluations_All.csv", row.names = 1)
eval.semi.bal.individual.combined$model_type = "individual_algo"
eval.semi.bal.individual.combined$dataset = "GBIF-eDNA"
eval.semi.bal.individual.combined$species = "Semibalanus balanoides"

eval.semi.bal.ensemble.combined <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-GBIF-eDNA/Evaluations_All.csv", row.names = 1)
eval.semi.bal.ensemble.combined$model_type = "ensemble"
eval.semi.bal.ensemble.combined$dataset = "GBIF-eDNA"
eval.semi.bal.ensemble.combined$species = "Semibalanus balanoides"

eval.semi.bal.individual.baseline <- read.csv("Data/Processed_Data/Semi.bal/Individual-baseline/Evaluations_All.csv", row.names = 1)
eval.semi.bal.individual.baseline$model_type = "individual_algo"
eval.semi.bal.individual.baseline$dataset = "Baseline"
eval.semi.bal.individual.baseline$species = "Semibalanus balanoides"

eval.semi.bal.ensemble.baseline <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-baseline/Evaluations_All.csv", row.names = 1)
eval.semi.bal.ensemble.baseline$model_type = "ensemble"
eval.semi.bal.ensemble.baseline$dataset = "Baseline"
eval.semi.bal.ensemble.baseline$species = "Semibalanus balanoides"

# chthamalus

eval.chth.mon.individual.visual <- read.csv("Data/Processed_Data/Chtham.mont/Individual-GBIF-GBIF/Evaluations_All.csv", row.names = 1)
eval.chth.mon.individual.visual$model_type = "individual_algo"
eval.chth.mon.individual.visual$dataset = "GBIF + GBIF"
eval.chth.mon.individual.visual$species = "Chthamalus montagui"

eval.chth.mon.ensemble.visual <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-GBIF-GBIF/Evaluations_All.csv", row.names = 1)
eval.chth.mon.ensemble.visual$model_type = "ensemble"
eval.chth.mon.ensemble.visual$dataset = "GBIF + GBIF"
eval.chth.mon.ensemble.visual$species = "Chthamalus montagui"

eval.chth.mon.individual.combined <- read.csv("Data/Processed_Data/Chtham.mont/Individual-GBIF-eDNA/Evaluations_All.csv", row.names = 1)
eval.chth.mon.individual.combined$model_type = "individual_algo"
eval.chth.mon.individual.combined$dataset = "GBIF-eDNA"
eval.chth.mon.individual.combined$species = "Chthamalus montagui"

eval.chth.mon.ensemble.combined <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-GBIF-eDNA/Evaluations_All.csv", row.names = 1)
eval.chth.mon.ensemble.combined$model_type = "ensemble"
eval.chth.mon.ensemble.combined$dataset = "GBIF-eDNA"
eval.chth.mon.ensemble.combined$species = "Chthamalus montagui"

eval.chth.mon.individual.baseline <- read.csv("Data/Processed_Data/Chtham.mont/Individual-baseline/Evaluations_All.csv", row.names = 1)
eval.chth.mon.individual.baseline$model_type = "individual_algo"
eval.chth.mon.individual.baseline$dataset = "Baseline"
eval.chth.mon.individual.baseline$species = "Chthamalus montagui"

eval.chth.mon.ensemble.baseline <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-baseline/Evaluations_All.csv", row.names = 1)
eval.chth.mon.ensemble.baseline$model_type = "ensemble"
eval.chth.mon.ensemble.baseline$dataset = "Baseline"
eval.chth.mon.ensemble.baseline$species = "Chthamalus montagui"

#join
neworder <- c("Semibalanus balanoides","Chthamalus montagui")

full_evals_ensemble<- rbind(eval.semi.bal.ensemble.visual,
                    eval.semi.bal.ensemble.combined,
                    eval.semi.bal.ensemble.baseline,
                    eval.chth.mon.ensemble.visual,
                    eval.chth.mon.ensemble.combined,
                    eval.chth.mon.ensemble.baseline)

full_evals_individual<- rbind(eval.semi.bal.individual.visual,
                    eval.semi.bal.individual.combined,
                    eval.semi.bal.individual.baseline,
                    eval.chth.mon.individual.visual,
                    eval.chth.mon.individual.combined,
                    eval.chth.mon.individual.baseline)

full_evals_individual <- arrange(transform(full_evals_individual,
             species=factor(species,levels=neworder)),species)

# filter for mean only in ensemble
full_evals_ensemble_mean <- full_evals_ensemble %>% filter(algo == "EMmean")

Let’s compare the datasets across the evaluation metrics.

GBIF_only_colour = "#FA230F"
GBIF_eDNA_colour = "#345995"
baseline_colour = "#EAC435"
eval_boxplot_ensemble <- ggplot(full_evals_ensemble_mean, aes(x = metric.eval, y = evaluation, fill = dataset)) + 
  geom_boxplot() +
  theme_bw() +
  facet_wrap(~factor(species, neworder)) +
  scale_fill_manual(values = c(
    "GBIF + GBIF" = GBIF_only_colour,
    "GBIF-eDNA" = GBIF_eDNA_colour,
    "Baseline" = baseline_colour
  )) +
  labs(x = "Metric", y = "Evaluation value", fill = "Dataset") +
  theme(legend.position = "none")

eval_boxplot_ensemble

ggsave(eval_boxplot_ensemble, file = "Figures/eval_boxplot_ensemble.png", height = 5, width = 8, dpi = 300)
eval_boxplot_individual<- ggplot(full_evals_individual, aes(x = metric.eval, y = evaluation, fill = dataset)) + 
  geom_boxplot() +
  theme_bw() +
  facet_wrap(~factor(species, neworder)) +
    scale_fill_manual(values = c(
    "GBIF + GBIF" = GBIF_only_colour,
    "GBIF-eDNA" = GBIF_eDNA_colour,
    "Baseline" = baseline_colour
  )) +
  labs(x = "Metric", y = "Evaluation value", fill = "Dataset")

eval_boxplot_individual

ggsave(eval_boxplot_individual, file = "Figures/eval_boxplot_individual.png", height = 5, width = 8, dpi = 300)
eval_boxplot_together <- plot_grid(eval_boxplot_individual, eval_boxplot_ensemble, labels = c("a", "b"), ncol = 1, align = "hv")

eval_boxplot_together

ggsave(eval_boxplot_together, file = "Figures/eval_boxplot_together.png", height = 7, width = 7, dpi = 300)
eval_boxplot_individual_models<- ggplot(full_evals_individual, aes(x = metric.eval, y = evaluation, fill = algo)) + 
  geom_boxplot() +
  theme_bw() +
  facet_wrap(~factor(species, neworder)) +
  labs(x = "Metric", y = "Evaluation value", fill = "Dataset")

eval_boxplot_individual_models

ggsave(eval_boxplot_individual_models, file = "Figures/eval_boxplot_individual_models.png", height = 5, width = 8, dpi = 300)
eval_boxplot_individual_models_data<- ggplot(full_evals_individual, aes(x = metric.eval, y = evaluation, fill = algo, colour = dataset)) + 
  geom_boxplot() +
  theme_bw() +
  facet_wrap(~factor(species, neworder)) +
  labs(x = "Metric", y = "Evaluation value", fill = "Algo")

eval_boxplot_individual_models_data

ggsave(eval_boxplot_individual_models_data, file = "Figures/eval_boxplot_individual_models_data.png", height = 5, width = 8, dpi = 300)
eval_boxplot_individual_models_data_TSS <- ggplot(
  full_evals_individual %>% filter(metric.eval == "TSS"),
  aes(x = algo, y = evaluation, fill = dataset)
) +
  geom_boxplot() +
  theme_bw() +
  facet_wrap( ~ factor(species, neworder)) +
  labs(x = "Algorithm", y = "TSS", fill = "Dataset") +
  scale_fill_manual(values = c(
    "GBIF + GBIF" = GBIF_only_colour,
    "GBIF-eDNA" = GBIF_eDNA_colour,
    "Baseline" = baseline_colour
  )) +
  geom_hline(yintercept=0.4, linetype='dashed', col = 'red')

eval_boxplot_individual_models_data_TSS

eval_boxplot_individual_models_data_ROC <- ggplot(
  full_evals_individual %>% filter(metric.eval == "ROC"),
  aes(x = algo, y = evaluation, fill = dataset)
) +
  geom_boxplot() +
  theme_bw() +
  facet_wrap( ~ factor(species, neworder)) +
  labs(x = "Algorithm", y = "ROC", fill = "Dataset")+
  scale_fill_manual(values = c(
    "GBIF + GBIF" = GBIF_only_colour,
    "GBIF-eDNA" = GBIF_eDNA_colour,
    "Baseline" = baseline_colour
  )) +
  geom_hline(yintercept=0.7, linetype='dashed', col = 'red')

eval_boxplot_individual_models_data_ROC

eval_boxplot_together_algo <- plot_grid(eval_boxplot_individual_models_data_ROC, eval_boxplot_individual_models_data_TSS, labels = c("a", "b"), ncol = 1)

eval_boxplot_together_algo

ggsave(eval_boxplot_together_algo, file = "Figures/eval_boxplot_together_algo.png", height = 8, width = 8, dpi = 300)

Let’s compare ensemble vs individual.

full_evals_individual_reduced <- full_evals_individual %>% select(c(full.name, evaluation, dataset, species, metric.eval, model_type))

full_evals_ensemble_mean <- full_evals_ensemble_mean %>% select(c(full.name, evaluation, dataset, species, metric.eval, model_type))

eval_reduced_joined <- rbind(full_evals_individual_reduced, full_evals_ensemble_mean)

#summary data

summary_eval <- eval_reduced_joined %>% 
  group_by(dataset, model_type, metric.eval,species) %>% 
  summarise(mean = mean(evaluation),
            se = sd(evaluation)/sqrt(n()))

summary_eval
# A tibble: 24 × 6
# Groups:   dataset, model_type, metric.eval [12]
   dataset     model_type      metric.eval species                 mean     se
   <chr>       <chr>           <chr>       <chr>                  <dbl>  <dbl>
 1 Baseline    ensemble        ROC         Chthamalus montagui    0.834 0     
 2 Baseline    ensemble        ROC         Semibalanus balanoides 0.688 0.0145
 3 Baseline    ensemble        TSS         Chthamalus montagui    0.557 0     
 4 Baseline    ensemble        TSS         Semibalanus balanoides 0.343 0.0110
 5 Baseline    individual_algo ROC         Chthamalus montagui    0.818 0.0109
 6 Baseline    individual_algo ROC         Semibalanus balanoides 0.595 0.0229
 7 Baseline    individual_algo TSS         Chthamalus montagui    0.580 0.0196
 8 Baseline    individual_algo TSS         Semibalanus balanoides 0.248 0.0302
 9 GBIF + GBIF ensemble        ROC         Chthamalus montagui    0.848 0     
10 GBIF + GBIF ensemble        ROC         Semibalanus balanoides 0.669 0.0155
# ℹ 14 more rows
eval_boxplot_join_data_TSS <- ggplot(
  eval_reduced_joined %>% filter(metric.eval == "TSS"),
  aes(x = model_type, y = evaluation, fill = dataset)
) +
  geom_boxplot() +
  theme_bw() +
  facet_wrap( ~ factor(species, neworder)) +
  labs(x = "Model type", y = "TSS", fill = "Dataset") +
  scale_fill_manual(values = c(
    "GBIF + GBIF" = GBIF_only_colour,
    "GBIF-eDNA" = GBIF_eDNA_colour,
    "Baseline" = baseline_colour
  ))+
  scale_x_discrete(labels = c("Ensemble", "Individual")) +
  theme(legend.position = "none")

eval_boxplot_join_data_TSS

eval_boxplot_join_data_ROC <- ggplot(
  eval_reduced_joined %>% filter(metric.eval == "ROC"),
  aes(x = model_type, y = evaluation, fill = dataset)
) +
  geom_boxplot() +
  theme_bw() +
  facet_wrap( ~ factor(species, neworder)) +
  labs(x = "Model type", y = "ROC", fill = "Dataset") +
  scale_fill_manual(values = c(
    "GBIF + GBIF" = GBIF_only_colour,
    "GBIF-eDNA" = GBIF_eDNA_colour,
    "Baseline" = baseline_colour
  )) +
  scale_x_discrete(labels = c("Ensemble", "Individual"))

eval_boxplot_join_data_ROC

eval_boxplot_together_both <- plot_grid(eval_boxplot_join_data_ROC, eval_boxplot_join_data_TSS, labels = c("a", "b"), ncol = 1, align = "hv")

eval_boxplot_together_both

ggsave(eval_boxplot_together_both, file = "Figures/eval_boxplot_together_both.png", height = 8, width = 8, dpi = 300)
mean_range_plot_mods_TSS <- ggplot(
  summary_eval %>%
    filter(metric.eval == "TSS") %>%
    mutate(dataset = factor(
      dataset, levels = c("Baseline", "GBIF + GBIF", "GBIF-eDNA")
    )),
  aes(x = model_type, y = mean, colour = dataset)
) +
  geom_pointrange(aes(ymin = mean - (se*2), ymax = mean + (se*2)),
                  size = 1.5,
                  position = position_dodge(width = 0.4)) +   # dodge by dataset
  labs(x = "Model type", y = "TSS", colour = "Dataset") +
  facet_wrap( ~ factor(species, neworder)) +
  scale_x_discrete(
    limits = c("individual_algo", "ensemble"),
    labels = c("Individual", "Ensemble")
  ) +   # reorder x-axis
  scale_colour_manual(
    values = c("GBIF + GBIF" = GBIF_only_colour, 
               "GBIF-eDNA" = GBIF_eDNA_colour,
                "Baseline" = baseline_colour),
    breaks = c("Baseline", "GBIF + GBIF", "GBIF-eDNA"), # reorder datasets
    labels = c("GBIF + GBIF" = "GBIF + GBIF", 
               "GBIF-eDNA" = "GBIF + eDNA",
               "Baseline" = "GBIF baseline")
  ) +            
  theme_bw() +
  theme(text = element_text(size = 13), legend.position = "none",
        strip.text = element_text(face = "italic")) 

mean_range_plot_mods_TSS

mean_range_plot_mods_roc <- ggplot(
  summary_eval %>%
    filter(metric.eval == "ROC") %>%
    mutate(dataset = factor(
      dataset, levels = c("Baseline", "GBIF + GBIF", "GBIF-eDNA")
    )),
  aes(x = model_type, y = mean, colour = dataset)
) +
  geom_pointrange(aes(ymin = mean - (se*2), ymax = mean + (se*2)),
                  size = 1.5,
                  position = position_dodge(width = 0.4)) +   # dodge by dataset
  labs(x = "Model type", y = "ROC", colour = "Dataset") +
  facet_wrap( ~ factor(species, neworder)) +
  scale_x_discrete(
    limits = c("individual_algo", "ensemble"),
    labels = c("Individual", "Ensemble")
  ) +   # reorder x-axis
  scale_colour_manual(
    values = c("GBIF + GBIF" = GBIF_only_colour, 
               "GBIF-eDNA" = GBIF_eDNA_colour,
              "Baseline" = baseline_colour),
    breaks = c("Baseline", "GBIF + GBIF", "GBIF-eDNA"),# reorder datasets
    labels = c("GBIF + GBIF" = "GBIF baseline + GBIF additional", 
               "GBIF-eDNA" = "GBIF baseline + eDNA",
               "Baseline" = "GBIF baseline")
  ) +            
  theme_bw() +
  theme(text = element_text(size = 13),
        strip.text = element_text(face = "italic")) 

mean_range_plot_mods_roc

eval_boxplot_together_both_meanSE <- plot_grid(mean_range_plot_mods_roc, mean_range_plot_mods_TSS, labels = c("a", "b"), ncol = 1, align = "hv")

eval_boxplot_together_both_meanSE

ggsave(eval_boxplot_together_both_meanSE, file = "Figures/eval_boxplot_together_both_meanSE.png", height = 10, width = 11, dpi = 300)
summary_eval2 <- eval_reduced_joined %>% 
  group_by(dataset, model_type, metric.eval) %>% 
  summarise(mean = mean(evaluation),
            se = sd(evaluation)/sqrt(n()))

summary_eval2
# A tibble: 12 × 5
# Groups:   dataset, model_type [6]
   dataset     model_type      metric.eval  mean     se
   <chr>       <chr>           <chr>       <dbl>  <dbl>
 1 Baseline    ensemble        ROC         0.761 0.0427
 2 Baseline    ensemble        TSS         0.45  0.0619
 3 Baseline    individual_algo ROC         0.706 0.0236
 4 Baseline    individual_algo TSS         0.414 0.0347
 5 GBIF + GBIF ensemble        ROC         0.758 0.0522
 6 GBIF + GBIF ensemble        TSS         0.464 0.0686
 7 GBIF + GBIF individual_algo ROC         0.703 0.0239
 8 GBIF + GBIF individual_algo TSS         0.404 0.0345
 9 GBIF-eDNA   ensemble        ROC         0.756 0.0505
10 GBIF-eDNA   ensemble        TSS         0.459 0.0523
11 GBIF-eDNA   individual_algo ROC         0.706 0.0247
12 GBIF-eDNA   individual_algo TSS         0.408 0.0340

Variable importance plots

#semibalanus

var.semi.bal.individual.visual <- read.csv("Data/Processed_Data/Semi.bal/Individual-GBIF-GBIF/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.individual.visual$model_type = "individual_algo"
var.semi.bal.individual.visual$dataset = "GBIF + GBIF"
var.semi.bal.individual.visual$species = "Semibalanus balanoides"

var.semi.bal.ensemble.visual <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-GBIF-GBIF/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.ensemble.visual$model_type = "ensemble"
var.semi.bal.ensemble.visual$dataset = "GBIF + GBIF"
var.semi.bal.ensemble.visual$species = "Semibalanus balanoides"

var.semi.bal.individual.combined <- read.csv("Data/Processed_Data/Semi.bal/Individual-GBIF-eDNA/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.individual.combined$model_type = "individual_algo"
var.semi.bal.individual.combined$dataset = "GBIF-eDNA"
var.semi.bal.individual.combined$species = "Semibalanus balanoides"

var.semi.bal.ensemble.combined <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-GBIF-eDNA/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.ensemble.combined$model_type = "ensemble"
var.semi.bal.ensemble.combined$dataset = "GBIF-eDNA"
var.semi.bal.ensemble.combined$species = "Semibalanus balanoides"

var.semi.bal.individual.baseline <- read.csv("Data/Processed_Data/Semi.bal/Individual-baseline/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.individual.baseline$model_type = "individual_algo"
var.semi.bal.individual.baseline$dataset = "Baseline"
var.semi.bal.individual.baseline$species = "Semibalanus balanoides"

var.semi.bal.ensemble.baseline <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-baseline/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.ensemble.baseline$model_type = "ensemble"
var.semi.bal.ensemble.baseline$dataset = "Baseline"
var.semi.bal.ensemble.baseline$species = "Semibalanus balanoides"

# chthamalus

var.chth.mon.individual.visual <- read.csv("Data/Processed_Data/Chtham.mont/Individual-GBIF-GBIF/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.individual.visual$model_type = "individual_algo"
var.chth.mon.individual.visual$dataset = "GBIF + GBIF"
var.chth.mon.individual.visual$species = "Chthamalus montagui"

var.chth.mon.ensemble.visual <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-GBIF-GBIF/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.ensemble.visual$model_type = "ensemble"
var.chth.mon.ensemble.visual$dataset = "GBIF + GBIF"
var.chth.mon.ensemble.visual$species = "Chthamalus montagui"

var.chth.mon.individual.combined <- read.csv("Data/Processed_Data/Chtham.mont/Individual-GBIF-eDNA/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.individual.combined$model_type = "individual_algo"
var.chth.mon.individual.combined$dataset = "GBIF-eDNA"
var.chth.mon.individual.combined$species = "Chthamalus montagui"

var.chth.mon.ensemble.combined <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-GBIF-eDNA/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.ensemble.combined$model_type = "ensemble"
var.chth.mon.ensemble.combined$dataset = "GBIF-eDNA"
var.chth.mon.ensemble.combined$species = "Chthamalus montagui"

var.chth.mon.individual.baseline <- read.csv("Data/Processed_Data/Chtham.mont/Individual-baseline/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.individual.baseline$model_type = "individual_algo"
var.chth.mon.individual.baseline$dataset = "Baseline"
var.chth.mon.individual.baseline$species = "Chthamalus montagui"

var.chth.mon.ensemble.baseline <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-baseline/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.ensemble.baseline$model_type = "ensemble"
var.chth.mon.ensemble.baseline$dataset = "Baseline"
var.chth.mon.ensemble.baseline$species = "Chthamalus montagui"


#join
neworder <- c("Semibalanus balanoides","Chthamalus montagui")

full_vars_ensemble<- rbind(var.semi.bal.ensemble.visual,
                    var.semi.bal.ensemble.combined,
                     var.semi.bal.ensemble.baseline,
                    var.chth.mon.ensemble.visual,
                    var.chth.mon.ensemble.combined,
                    var.chth.mon.ensemble.baseline)

full_vars_individual<- rbind(var.semi.bal.individual.visual,
                    var.semi.bal.individual.combined,
                    var.semi.bal.individual.baseline,
                    var.chth.mon.individual.visual,
                    var.chth.mon.individual.combined,
                    var.chth.mon.individual.baseline)

full_vars_individual <- arrange(transform(full_vars_individual,
             species=factor(species,levels=neworder)),species)

# filter for mean only in ensemble
full_vars_ensemble_mean <- full_vars_ensemble %>% filter(algo == "EMmean")
vars_boxplot_ensemble<- ggplot(full_vars_ensemble_mean  %>%
    mutate(dataset = factor(
      dataset, levels = c("Baseline", "GBIF + GBIF", "GBIF-eDNA")
    )), aes(x = expl.var, y = var.imp, fill = dataset)) + 
  geom_boxplot() +
  theme_bw() +
  facet_wrap(~factor(species, neworder)) +
  scale_fill_manual(
    values = c(
      "GBIF + GBIF" = GBIF_only_colour,
      "GBIF-eDNA" = GBIF_eDNA_colour,
      "Baseline" = baseline_colour
    ),
    breaks = c("Baseline", "GBIF + GBIF", "GBIF-eDNA"), # reorder datasets
    labels = c(
      "GBIF + GBIF" = "GBIF baseline + GBIF additional",
      "GBIF-eDNA" = "GBIF baseline + eDNA",
      "Baseline" = "GBIF baseline"
    )
  ) +
  labs(x = "Environmental variable", y = "Variable importance", fill = "Dataset")+
  scale_x_discrete(labels = c("SST", "pH", "Wave fetch"))+
  theme(strip.text = element_text(face = "italic"),
        legend.position = "bottom")

vars_boxplot_ensemble

ggsave(vars_boxplot_ensemble, file = "Figures/vars_boxplot_ensemble.png", height = 5, width = 6, dpi = 300)
vars_boxplot_individual <- ggplot(
  full_vars_individual %>%
    mutate(dataset = factor(
      dataset, levels = c("Baseline", "GBIF + GBIF", "GBIF-eDNA")
    )),
  aes(x = expl.var, y = var.imp, fill = dataset)
) +
  geom_boxplot() +
  theme_bw() +
  facet_grid(algo ~ factor(species, neworder)) +
  labs(x = "Environmental variable", y = "Variable importance", fill = "Dataset") +
  scale_fill_manual(
    values = c(
      "GBIF + GBIF" = GBIF_only_colour,
      "GBIF-eDNA" = GBIF_eDNA_colour,
      "Baseline" = baseline_colour
    ),
    breaks = c("Baseline", "GBIF + GBIF", "GBIF-eDNA"), # reorder datasets
    labels = c(
      "GBIF + GBIF" = "GBIF baseline + GBIF additional",
      "GBIF-eDNA" = "GBIF baseline + eDNA",
      "Baseline" = "GBIF baseline"
    )
  ) +
  scale_x_discrete(labels = c("SST", "pH", "Wave fetch")) +
  theme(strip.text = element_text(face = "italic"),
        legend.position = "bottom")

vars_boxplot_individual

ggsave(vars_boxplot_individual, file = "Figures/vars_boxplot_individual.png", height = 9, width = 7, dpi = 300)

Response curve pots

SB (GBIF + GBIF)
# Semibalanus - GBIF + GBIF

response_mean_SB_visual <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_SB_visual <- response_mean_SB_visual$tab

response_mean_SB_visual_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_SB_visual_ensemble <- response_mean_SB_visual_ensemble$tab
tab_mean_SB_visual_ensemble_reduced <- tab_mean_SB_visual_ensemble %>% 
  filter(pred.name == "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo")

tab_mean_SB_visual_reduced <- tab_mean_SB_visual %>% 
  filter(pred.name == c("Semibalanus.balanoides_allData_allRun_GLM",
                        "Semibalanus.balanoides_allData_allRun_MAXENT",
                        "Semibalanus.balanoides_allData_allRun_RF",
                        "Semibalanus.balanoides_allData_allRun_GAM"))

tab_mean_SB_visual_reduced_add <- rbind(tab_mean_SB_visual_reduced, tab_mean_SB_visual_ensemble_reduced)

plot_mean_SB_visual <- ggplot(
  tab_mean_SB_visual_reduced_add,
  aes(x = expl.val, y = pred.val, color = pred.name)
) +
  geom_line(linewidth = 1) +
  facet_wrap(
    ~expl.name,
    scales = "free_x",
    labeller = labeller(expl.name = c(
      "ocean_temp" = "SST",
      "ph"  = "pH",
      "wave_fetch" = "Wave fetch"
    ))
  ) +
  scale_color_manual(
    values = c("Semibalanus.balanoides_allData_allRun_GAM" = "blue", 
               "Semibalanus.balanoides_allData_allRun_GLM" = "red", 
               "Semibalanus.balanoides_allData_allRun_RF" = "green", 
               "Semibalanus.balanoides_allData_allRun_MAXENT" = "purple",
    "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
    labels = c("Semibalanus.balanoides_allData_allRun_GAM" = "GAM", 
               "Semibalanus.balanoides_allData_allRun_GLM" = "GLM", 
               "Semibalanus.balanoides_allData_allRun_RF" = "RF", 
               "Semibalanus.balanoides_allData_allRun_MAXENT" = "MAXENT",
               "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
    name = "Model"
  ) +
  labs(x = "Predictor", y = "Response") +
  theme_bw()

plot_mean_SB_visual

SB (GBIF + eDNA)
# Semibalanus - GBIF + eDNA

response_mean_SB_combined <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_SB_combined <- response_mean_SB_combined$tab

response_mean_SB_combined_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_SB_combined_ensemble <- response_mean_SB_combined_ensemble$tab
tab_mean_SB_combined_ensemble_reduced <- tab_mean_SB_combined_ensemble %>% 
  filter(pred.name == "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo")

tab_mean_SB_combined_reduced <- tab_mean_SB_combined %>% 
  filter(pred.name == c("Semibalanus.balanoides_allData_allRun_GLM",
                        "Semibalanus.balanoides_allData_allRun_MAXENT",
                        "Semibalanus.balanoides_allData_allRun_RF",
                        "Semibalanus.balanoides_allData_allRun_GAM"))

tab_mean_SB_combined_reduced_add <- rbind(tab_mean_SB_combined_reduced, tab_mean_SB_combined_ensemble_reduced)

plot_mean_SB_combined <- ggplot(
  tab_mean_SB_combined_reduced_add,
  aes(x = expl.val, y = pred.val, color = pred.name)
) +
  geom_line(linewidth = 1) +
  facet_wrap(
    ~expl.name,
    scales = "free_x",
    labeller = labeller(expl.name = c(
      "ocean_temp" = "SST",
      "ph"  = "pH",
      "wave_fetch" = "Wave fetch"
    ))
  ) +
  scale_color_manual(
    values = c("Semibalanus.balanoides_allData_allRun_GAM" = "blue", 
               "Semibalanus.balanoides_allData_allRun_GLM" = "red", 
               "Semibalanus.balanoides_allData_allRun_RF" = "green", 
               "Semibalanus.balanoides_allData_allRun_MAXENT" = "purple",
    "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
    labels = c("Semibalanus.balanoides_allData_allRun_GAM" = "GAM", 
               "Semibalanus.balanoides_allData_allRun_GLM" = "GLM", 
               "Semibalanus.balanoides_allData_allRun_RF" = "RF", 
               "Semibalanus.balanoides_allData_allRun_MAXENT" = "MAXENT",
               "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
    name = "Model"
  ) +
  labs(x = "Predictor", y = "Response") +
  theme_bw()+
  theme(legend.position = "none")

plot_mean_SB_combined

SB (GBIF baseline)
# Semibalanus -baseline

response_mean_SB_baseline <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_SB_baseline <- response_mean_SB_baseline$tab

response_mean_SB_baseline_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_SB_baseline_ensemble <- response_mean_SB_baseline_ensemble$tab
tab_mean_SB_baseline_ensemble_reduced <- tab_mean_SB_baseline_ensemble %>% 
  filter(pred.name == "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo")

tab_mean_SB_baseline_reduced <- tab_mean_SB_baseline %>% 
  filter(pred.name == c("Semibalanus.balanoides_allData_allRun_GLM",
                        "Semibalanus.balanoides_allData_allRun_MAXENT",
                        "Semibalanus.balanoides_allData_allRun_RF",
                        "Semibalanus.balanoides_allData_allRun_GAM"))

tab_mean_SB_baseline_reduced_add <- rbind(tab_mean_SB_baseline_reduced, tab_mean_SB_baseline_ensemble_reduced)

plot_mean_SB_baseline <- ggplot(
  tab_mean_SB_baseline_reduced_add,
  aes(x = expl.val, y = pred.val, color = pred.name)
) +
  geom_line(linewidth = 1) +
  facet_wrap(
    ~expl.name,
    scales = "free_x",
    labeller = labeller(expl.name = c(
      "ocean_temp" = "SST",
      "ph"  = "pH",
      "wave_fetch" = "Wave fetch"
    ))
  ) +
  scale_color_manual(
    values = c("Semibalanus.balanoides_allData_allRun_GAM" = "blue", 
               "Semibalanus.balanoides_allData_allRun_GLM" = "red", 
               "Semibalanus.balanoides_allData_allRun_RF" = "green", 
               "Semibalanus.balanoides_allData_allRun_MAXENT" = "purple",
    "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
    labels = c("Semibalanus.balanoides_allData_allRun_GAM" = "GAM", 
               "Semibalanus.balanoides_allData_allRun_GLM" = "GLM", 
               "Semibalanus.balanoides_allData_allRun_RF" = "RF", 
               "Semibalanus.balanoides_allData_allRun_MAXENT" = "MAXENT",
               "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
    name = "Model"
  ) +
  labs(x = "Predictor", y = "Response") +
  theme_bw()+
  theme(legend.position = "none")

plot_mean_SB_baseline

CM (GBIF + GBIF)
# Chthamalus - GBIF + GBIF

response_mean_CM_visual <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_CM_visual <- response_mean_CM_visual$tab

response_mean_CM_visual_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_CM_visual_ensemble <- response_mean_CM_visual_ensemble$tab
tab_mean_CM_visual_ensemble_reduced <- tab_mean_CM_visual_ensemble %>% 
  filter(pred.name == "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo")

tab_mean_CM_visual_reduced <- tab_mean_CM_visual %>% 
  filter(pred.name == c("Chthamalus.montagui_allData_allRun_GLM",
                        "Chthamalus.montagui_allData_allRun_MAXENT",
                        "Chthamalus.montagui_allData_allRun_RF",
                        "Chthamalus.montagui_allData_allRun_GAM"))

tab_mean_CM_visual_reduced_add <- rbind(tab_mean_CM_visual_reduced, tab_mean_CM_visual_ensemble_reduced)

plot_mean_CM_visual <- ggplot(
  tab_mean_CM_visual_reduced_add,
  aes(x = expl.val, y = pred.val, color = pred.name)
) +
  geom_line(linewidth = 1) +
  facet_wrap(
    ~expl.name,
    scales = "free_x",
    labeller = labeller(expl.name = c(
      "ocean_temp" = "SST",
      "ph"  = "pH",
      "wave_fetch" = "Wave fetch"
    ))
  ) +
  scale_color_manual(
    values = c("Chthamalus.montagui_allData_allRun_GAM" = "blue", 
               "Chthamalus.montagui_allData_allRun_GLM" = "red", 
               "Chthamalus.montagui_allData_allRun_RF" = "green", 
               "Chthamalus.montagui_allData_allRun_MAXENT" = "purple",
    "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
    labels = c("Chthamalus.montagui_allData_allRun_GAM" = "GAM", 
               "Chthamalus.montagui_allData_allRun_GLM" = "GLM", 
               "Chthamalus.montagui_allData_allRun_RF" = "RF", 
               "Chthamalus.montagui_allData_allRun_MAXENT" = "MAXENT",
               "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
    name = "Model"
  ) +
  labs(x = "Predictor", y = "Response") +
  theme_bw()

plot_mean_CM_visual

CM (GBIF + eDNA)
# Chthamalus - GBIF + eDNA

response_mean_CM_combined <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_CM_combined <- response_mean_CM_combined$tab

response_mean_CM_combined_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_CM_combined_ensemble <- response_mean_CM_combined_ensemble$tab
tab_mean_CM_combined_ensemble_reduced <- tab_mean_CM_combined_ensemble %>% 
  filter(pred.name == "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo")

tab_mean_CM_combined_reduced <- tab_mean_CM_combined %>% 
  filter(pred.name == c("Chthamalus.montagui_allData_allRun_GLM",
                        "Chthamalus.montagui_allData_allRun_MAXENT",
                        "Chthamalus.montagui_allData_allRun_RF",
                        "Chthamalus.montagui_allData_allRun_GAM"))

tab_mean_CM_combined_reduced_add <- rbind(tab_mean_CM_combined_reduced, tab_mean_CM_combined_ensemble_reduced)

plot_mean_CM_combined <- ggplot(
  tab_mean_CM_combined_reduced_add,
  aes(x = expl.val, y = pred.val, color = pred.name)
) +
  geom_line(linewidth = 1) +
  facet_wrap(
    ~expl.name,
    scales = "free_x",
    labeller = labeller(expl.name = c(
      "ocean_temp" = "SST",
      "ph"  = "pH",
      "wave_fetch" = "Wave fetch"
    ))
  ) +
  scale_color_manual(
    values = c("Chthamalus.montagui_allData_allRun_GAM" = "blue", 
               "Chthamalus.montagui_allData_allRun_GLM" = "red", 
               "Chthamalus.montagui_allData_allRun_RF" = "green", 
               "Chthamalus.montagui_allData_allRun_MAXENT" = "purple",
    "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
    labels = c("Chthamalus.montagui_allData_allRun_GAM" = "GAM", 
               "Chthamalus.montagui_allData_allRun_GLM" = "GLM", 
               "Chthamalus.montagui_allData_allRun_RF" = "RF", 
               "Chthamalus.montagui_allData_allRun_MAXENT" = "MAXENT",
               "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
    name = "Model"
  ) +
  labs(x = "Predictor", y = "Response") +
  theme_bw() +
  theme(legend.position = "none")

plot_mean_CM_combined

CM (GBIF baseline)
# Chthamalus - baseline

response_mean_CM_baseline <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_CM_baseline <- response_mean_CM_baseline$tab

response_mean_CM_baseline_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, 
                      models.chosen = "all",
                      fixed.var = 'mean')

tab_mean_CM_baseline_ensemble <- response_mean_CM_baseline_ensemble$tab
tab_mean_CM_baseline_ensemble_reduced <- tab_mean_CM_baseline_ensemble %>% 
  filter(pred.name == "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo")

tab_mean_CM_baseline_reduced <- tab_mean_CM_baseline %>% 
  filter(pred.name == c("Chthamalus.montagui_allData_allRun_GLM",
                        "Chthamalus.montagui_allData_allRun_MAXENT",
                        "Chthamalus.montagui_allData_allRun_RF",
                        "Chthamalus.montagui_allData_allRun_GAM"))

tab_mean_CM_baseline_reduced_add <- rbind(tab_mean_CM_baseline_reduced, tab_mean_CM_baseline_ensemble_reduced)

plot_mean_CM_baseline <- ggplot(
  tab_mean_CM_baseline_reduced_add,
  aes(x = expl.val, y = pred.val, color = pred.name)
) +
  geom_line(linewidth = 1) +
  facet_wrap(
    ~expl.name,
    scales = "free_x",
    labeller = labeller(expl.name = c(
      "ocean_temp" = "SST",
      "ph"  = "pH",
      "wave_fetch" = "Wave fetch"
    ))
  ) +
  scale_color_manual(
    values = c("Chthamalus.montagui_allData_allRun_GAM" = "blue", 
               "Chthamalus.montagui_allData_allRun_GLM" = "red", 
               "Chthamalus.montagui_allData_allRun_RF" = "green", 
               "Chthamalus.montagui_allData_allRun_MAXENT" = "purple",
    "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
    labels = c("Chthamalus.montagui_allData_allRun_GAM" = "GAM", 
               "Chthamalus.montagui_allData_allRun_GLM" = "GLM", 
               "Chthamalus.montagui_allData_allRun_RF" = "RF", 
               "Chthamalus.montagui_allData_allRun_MAXENT" = "MAXENT",
               "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
    name = "Model"
  ) +
  labs(x = "Predictor", y = "Response") +
  theme_bw() +
  theme(legend.position = "none")

plot_mean_CM_baseline

Plot together
response_curves_together_SB <- plot_grid(plot_mean_SB_baseline,
                                         plot_mean_SB_visual,
                                        plot_mean_SB_combined,
                                      ncol = 1,
                                      align = "hv")
response_curves_together_SB

ggsave(response_curves_together_SB, file = "Figures/response_curves_together_SB.png", height = 12, width = 10, dpi = 300)

response_curves_together_CM <- plot_grid(plot_mean_CM_baseline,
                                         plot_mean_CM_visual,
                                      plot_mean_CM_combined,
                                      labels = c("a", "b", "c"),
                                      ncol = 1,
                                      align = "hv")
response_curves_together_CM

ggsave(response_curves_together_CM, file = "Figures/response_curves_together_CM.png", height = 12, width = 10, dpi = 300)

Concluding remarks

We have now produced all the analyses for chapter 4.